From 939beb11a5ccf58d7fe79a809a1b592c5c9143c0 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 11 Nov 2021 13:20:30 +0200 Subject: Add support for dynamic dependencies in ad hoc Buildscript recipes Specifically, add the new `depdb dyndep` builtin that can be used to extract dynamic dependencies from a program run or a file. For example: obje{hello.o}: cxx{hello} {{ s = $path($<[0]) depdb dyndep $cxx.poptions $cc.poptions --what=header --default-prereq-type=h -- $cxx.path $cxx.poptions $cc.poptions $cxx.mode -M -MG $s diag c++ ($<[0]) o = $path($>) $cxx.path $cxx.poptions $cc.poptions $cc.coptions $cxx.coptions $cxx.mode -o $o -c $s }} Currently only the `make` dependency format is supported. --- libbuild2/adhoc-rule-buildscript.cxx | 743 +++++++++++++++++------- libbuild2/adhoc-rule-buildscript.hxx | 12 + libbuild2/build/script/builtin-options.cxx | 701 +++++++++++++++++++++++ libbuild2/build/script/builtin-options.hxx | 456 +++++++++++++++ libbuild2/build/script/builtin-options.ixx | 338 +++++++++++ libbuild2/build/script/builtin.cli | 32 ++ libbuild2/build/script/parser.cxx | 871 ++++++++++++++++++++++++++--- libbuild2/build/script/parser.hxx | 79 ++- libbuild2/build/script/runner.hxx | 2 +- libbuild2/build/script/script.hxx | 11 +- libbuild2/build/script/types-parsers.cxx | 56 ++ libbuild2/build/script/types-parsers.hxx | 49 ++ libbuild2/buildfile | 79 ++- libbuild2/cc/compile-rule.cxx | 848 ++++------------------------ libbuild2/cc/compile-rule.hxx | 44 +- libbuild2/cc/utility.cxx | 57 -- libbuild2/cc/utility.hxx | 29 +- libbuild2/dyndep.cxx | 667 ++++++++++++++++++++++ libbuild2/dyndep.hxx | 168 ++++++ libbuild2/filesystem.cxx | 55 ++ libbuild2/filesystem.hxx | 29 + libbuild2/make-parser.cxx | 137 +++++ libbuild2/make-parser.hxx | 85 +++ libbuild2/make-parser.test.cxx | 90 +++ libbuild2/make-parser.test.testscript | 98 ++++ libbuild2/script/parser.cxx | 2 +- libbuild2/script/run.cxx | 225 +++++--- libbuild2/script/run.hxx | 15 +- libbuild2/script/script.hxx | 3 +- 29 files changed, 4718 insertions(+), 1263 deletions(-) create mode 100644 libbuild2/build/script/builtin-options.cxx create mode 100644 libbuild2/build/script/builtin-options.hxx create mode 100644 libbuild2/build/script/builtin-options.ixx create mode 100644 libbuild2/build/script/builtin.cli create mode 100644 libbuild2/build/script/types-parsers.cxx create mode 100644 libbuild2/build/script/types-parsers.hxx create mode 100644 libbuild2/dyndep.cxx create mode 100644 libbuild2/dyndep.hxx create mode 100644 libbuild2/make-parser.cxx create mode 100644 libbuild2/make-parser.hxx create mode 100644 libbuild2/make-parser.test.cxx create mode 100644 libbuild2/make-parser.test.testscript (limited to 'libbuild2') diff --git a/libbuild2/adhoc-rule-buildscript.cxx b/libbuild2/adhoc-rule-buildscript.cxx index 61b4cb2..78f87ae 100644 --- a/libbuild2/adhoc-rule-buildscript.cxx +++ b/libbuild2/adhoc-rule-buildscript.cxx @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include // path_perms(), auto_rmfile @@ -22,6 +23,110 @@ using namespace std; namespace build2 { + static inline void + hash_script_vars (sha256& cs, + const build::script::script& s, + const target& t, + names& storage) + { + context& ctx (t.ctx); + + for (const string& n: s.vars) + { + cs.append (n); + + lookup l; + + if (const variable* var = ctx.var_pool.find (n)) + l = t[var]; + + cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + + if (l) + { + storage.clear (); + names_view ns (reverse (*l, storage)); + + for (const name& n: ns) + to_checksum (cs, n); + } + } + } + + // How should we hash target and prerequisite sets ($> and $<)? We could + // hash them as target names (i.e., the same as the $>/< content) or as + // paths (only for path-based targets). While names feel more general, they + // are also more expensive to compute. And for path-based targets, path is + // generally a good proxy for the target name. Since the bulk of the ad hoc + // recipes will presumably be operating exclusively on path-based targets, + // let's do it both ways. + // + static inline void + hash_target (sha256& cs, const target& t, names& storage) + { + if (const path_target* pt = t.is_a ()) + cs.append (pt->path ().string ()); + else + { + storage.clear (); + t.as_name (storage); + for (const name& n: storage) + to_checksum (cs, n); + } + }; + + // The script can reference a program in one of four ways: + // + // 1. As an (imported) target (e.g., $cli) + // + // 2. As a process_path_ex (e.g., $cxx.path). + // + // 3. As a builtin (e.g., sed) + // + // 4. As a program path/name. + // + // When it comes to change tracking, there is nothing we can do for (4) (the + // user can track its environment manually with depdb-env) and there is + // nothing to do for (3) (assuming builtin semantics is stable/backwards- + // compatible). The (2) case is handled automatically by hashing all the + // variable values referenced by the script (see below), which in case of + // process_path_ex includes the checksums (both executable and environment), + // if available. + // + // This leaves the (1) case, which itself splits into two sub-cases: the + // target comes with the dependency information (e.g., imported from a + // project via an export stub) or it does not (e.g., imported as installed). + // We don't need to do anything extra for the first sub-case since the + // target's state/mtime can be relied upon like any other prerequisite. + // Which cannot be said about the second sub-case, where we reply on + // checksum that may be included as part of the target metadata. + // + // So what we are going to do is hash checksum metadata of every executable + // prerequisite target that has it (we do it here in order to include ad hoc + // prerequisites, which feels like the right thing to do; the user may mark + // tools as ad hoc in order to omit them from $<). + // + static inline void + hash_prerequisite_target (sha256& cs, sha256& exe_cs, sha256& env_cs, + const target& pt, + names& storage) + { + hash_target (cs, pt, storage); + + if (const exe* et = pt.is_a ()) + { + if (const string* c = et->lookup_metadata ("checksum")) + { + exe_cs.append (*c); + } + + if (const strings* e = et->lookup_metadata ("environment")) + { + hash_environment (env_cs, *e); + } + } + } + bool adhoc_buildscript_rule:: recipe_text (const scope& s, const target_type& tt, @@ -113,6 +218,20 @@ namespace build2 perform_update_id) != actions.end (); } + struct adhoc_buildscript_rule::match_data + { + match_data (action a, const target& t, bool temp_dir) + : env (a, t, temp_dir) {} + + build::script::environment env; + build::script::default_runner run; + + path dd; + const scope* bs; + timestamp mt; + bool deferred_failure; + }; + bool adhoc_buildscript_rule:: match (action a, target& t, const string& h, match_extra& me) const { @@ -140,15 +259,17 @@ namespace build2 recipe adhoc_buildscript_rule:: apply (action a, - target& t, + target& xt, match_extra& me, const optional& d) const { + tracer trace ("adhoc_buildscript_rule::apply"); + // We don't support deadlines for any of these cases (see below). // if (d && (a.outer () || me.fallback || - (a == perform_update_id && t.is_a ()))) + (a == perform_update_id && xt.is_a ()))) return empty_recipe; // If this is an outer operation (e.g., update-for-test), then delegate to @@ -156,20 +277,20 @@ namespace build2 // if (a.outer ()) { - match_inner (a, t); + match_inner (a, xt); return execute_inner; } // Inject pattern's ad hoc group members, if any. // if (pattern != nullptr) - pattern->apply_adhoc_members (a, t, me); + pattern->apply_adhoc_members (a, xt, me); // Derive file names for the target and its ad hoc group members, if any. // if (a == perform_update_id || a == perform_clean_id) { - for (target* m (&t); m != nullptr; m = m->adhoc_member) + for (target* m (&xt); m != nullptr; m = m->adhoc_member) { if (auto* p = m->is_a ()) p->derive_path (); @@ -181,68 +302,306 @@ namespace build2 // We do it always instead of only if one of the targets is path-based in // case the recipe creates temporary files or some such. // - inject_fsdir (a, t); + const fsdir* dir (inject_fsdir (a, xt)); // Match prerequisites. // - match_prerequisite_members (a, t); + match_prerequisite_members (a, xt); // Inject pattern's prerequisites, if any. // if (pattern != nullptr) - pattern->apply_prerequisites (a, t, me); + pattern->apply_prerequisites (a, xt, me); // See if we are providing the standard clean as a fallback. // if (me.fallback) return &perform_clean_depdb; - if (a == perform_update_id && t.is_a ()) + // See if this is not update or not on a file-based target. + // + if (a != perform_update_id || !xt.is_a ()) + { + return [d, this] (action a, const target& t) + { + return default_action (a, t, d); + }; + } + + // See if this is the simple case with only static dependencies. + // + if (!script.depdb_dyndep) { return [this] (action a, const target& t) { return perform_update_file (a, t); }; } + + // This is a perform update on a file target with extraction of dynamic + // dependency information in the depdb preamble (depdb-dyndep). + // + // This means we may need to add additional prerequisites (or even target + // group members). We also have to save any such additional prerequisites + // in depdb so that we can check if any of them have changed on subsequent + // updates. So all this means that have to take care of depdb here in + // apply() instead of perform_*() like we normally do. We also do things + // in slightly different order due to the restrictions impose by the match + // phase. + // + // Note that the C/C++ header dependency extraction is the canonical + // example and all this logic is based on the prior work in the cc module + // where you can often find more detailed rationale for some of the steps + // performed (like the fsdir update below). + // + context& ctx (xt.ctx); + + file& t (xt.as ()); + const path& tp (t.path ()); + + if (dir != nullptr) + fsdir_rule::perform_update_direct (a, t); + + // Because the depdb preamble can access $<, we have to blank out all the + // ad hoc prerequisites. Since we will still need them later, we "move" + // them to the auxiliary data member in prerequisite_target (which also + // means we cannot use the standard execute_prerequisites()). + // + auto& pts (t.prerequisite_targets[a]); + for (prerequisite_target& p: pts) + { + // Note that fsdir{} injected above is adhoc. + // + if (p.target != nullptr && p.adhoc) + { + p.data = reinterpret_cast (p.target); + p.target = nullptr; + } + } + + // NOTE: see the "static dependencies" version (with comments) below. + // + depdb dd (tp + ".d"); + + if (dd.expect (" 1") != nullptr) + l4 ([&]{trace << "rule mismatch forcing update of " << t;}); + + if (dd.expect (checksum) != nullptr) + l4 ([&]{trace << "recipe text change forcing update of " << t;}); + + if (!script.depdb_clear) + { + names storage; + + sha256 prq_cs, exe_cs, env_cs; + + for (const prerequisite_target& p: pts) + { + if (const target* pt = + (p.target != nullptr ? p.target : + p.data != 0 ? reinterpret_cast (p.data) : + nullptr)) + { + hash_prerequisite_target (prq_cs, exe_cs, env_cs, *pt, storage); + } + } + + { + sha256 cs; + hash_script_vars (cs, script, t, storage); + + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "recipe variable change forcing update of " << t;}); + } + + { + sha256 tcs; + for (const target* m (&t); m != nullptr; m = m->adhoc_member) + hash_target (tcs, *m, storage); + + if (dd.expect (tcs.string ()) != nullptr) + l4 ([&]{trace << "target set change forcing update of " << t;}); + + if (dd.expect (prq_cs.string ()) != nullptr) + l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); + } + + { + if (dd.expect (exe_cs.string ()) != nullptr) + l4 ([&]{trace << "program checksum change forcing update of " << t;}); + + if (dd.expect (env_cs.string ()) != nullptr) + l4 ([&]{trace << "environment change forcing update of " << t;}); + } + } + + const scope& bs (t.base_scope ()); + + unique_ptr md ( + new match_data (a, t, script.depdb_preamble_temp_dir)); + + build::script::environment& env (md->env); + build::script::default_runner& run (md->run); + + run.enter (env, script.start_loc); + + // Run the first half of the preamble (before depdb-dyndep). + // + { + build::script::parser p (ctx); + p.execute_depdb_preamble (a, bs, t, env, script, run, dd); + } + + // Determine if we need to do an update based on the above checks. + // + bool update; + timestamp mt; + + if (dd.writing ()) + update = true; else { - return [d, this] (action a, const target& t) + if ((mt = t.mtime ()) == timestamp_unknown) + t.mtime (mt = mtime (tp)); // Cache. + + update = dd.mtime > mt; + } + + if (update) + mt = timestamp_nonexistent; + + // Update our prerequisite targets. While strictly speaking we only need + // to update those that are referenced by depdb-dyndep, communicating + // this is both tedious and error-prone. So we update them all. + // + for (const prerequisite_target& p: pts) + { + if (const target* pt = + (p.target != nullptr ? p.target : + p.data != 0 ? reinterpret_cast (p.data) : nullptr)) { - return default_action (a, t, d); - }; + update = dyndep_rule::update ( + trace, a, *pt, update ? timestamp_unknown : mt) || update; + } + } + + // Run the second half of the preamble (depdb-dyndep commands) to extract + // dynamic dependencies. + // + // Note that this should be the last update to depdb (the invalidation + // order semantics). + // + bool deferred_failure (false); + { + build::script::parser p (ctx); + p.execute_depdb_preamble_dyndep (a, bs, t, + env, script, run, + dd, + update, + deferred_failure, + mt); } + + if (update && dd.reading () && !ctx.dry_run) + dd.touch = true; + + dd.close (); + md->dd = move (dd.path); + + // Pass on base scope and update/mtime. + // + md->bs = &bs; + md->mt = update ? timestamp_nonexistent : mt; + md->deferred_failure = deferred_failure; + + // @@ TMP: re-enable once recipe becomes move_only_function. + // +#if 0 + return [this, md = move (md)] (action a, const target& t) mutable + { + auto r (perform_update_file_dyndep (a, t, *md)); + md.reset (); // @@ TMP: is this really necessary (+mutable)? + return r; + }; +#else + t.data (move (md)); + return recipe ([this] (action a, const target& t) mutable + { + auto md (move (t.data> ())); + return perform_update_file_dyndep (a, t, *md); + }); +#endif } target_state adhoc_buildscript_rule:: - perform_update_file (action a, const target& xt) const + perform_update_file_dyndep (action a, const target& xt, match_data& md) const { - tracer trace ("adhoc_buildscript_rule::perform_update_file"); + tracer trace ("adhoc_buildscript_rule::perform_update_file_dyndep"); context& ctx (xt.ctx); const file& t (xt.as ()); const path& tp (t.path ()); - // How should we hash target and prerequisite sets ($> and $<)? We could - // hash them as target names (i.e., the same as the $>/< content) or as - // paths (only for path-based targets). While names feel more general, - // they are also more expensive to compute. And for path-based targets, - // path is generally a good proxy for the target name. Since the bulk of - // the ad hoc recipes will presumably be operating exclusively on - // path-based targets, let's do it both ways. + // While we've updated all our prerequisites in apply(), we still need to + // execute them here to keep the dependency counts straight. // - auto hash_target = [ns = names ()] (sha256& cs, const target& t) mutable + for (const prerequisite_target& p: t.prerequisite_targets[a]) { - if (const path_target* pt = t.is_a ()) - cs.append (pt->path ().string ()); - else + if (const target* pt = + (p.target != nullptr ? p.target : + p.data != 0 ? reinterpret_cast (p.data) : nullptr)) { - ns.clear (); - t.as_name (ns); - for (const name& n: ns) - to_checksum (cs, n); + target_state ts (execute_wait (a, *pt)); + assert (ts == target_state::unchanged || ts == target_state::changed); } - }; + } + + build::script::environment& env (md.env); + build::script::default_runner& run (md.run); + + // Force update in case of a deferred failure even if nothing changed. + // + if (md.mt != timestamp_nonexistent && !md.deferred_failure) + { + run.leave (env, script.end_loc); + return target_state::unchanged; + } + + // Sequence start time for mtime checks below. + // + timestamp start (!ctx.dry_run && depdb::mtime_check () + ? system_clock::now () + : timestamp_unknown); + + if (!ctx.dry_run || verb != 0) + { + if (execute_update_file (*md.bs, a, t, env, run, md.deferred_failure)) + ; + else + run.leave (env, script.end_loc); + } + else + run.leave (env, script.end_loc); + + timestamp now (system_clock::now ()); + + if (!ctx.dry_run) + depdb::check_mtime (start, md.dd, tp, now); + + t.mtime (now); + return target_state::changed; + } + + target_state adhoc_buildscript_rule:: + perform_update_file (action a, const target& xt) const + { + tracer trace ("adhoc_buildscript_rule::perform_update_file"); + + context& ctx (xt.ctx); + + const file& t (xt.as ()); + const path& tp (t.path ()); // Update prerequisites and determine if any of them render this target // out-of-date. @@ -250,6 +609,8 @@ namespace build2 timestamp mt (t.load_mtime ()); optional ps; + names storage; + sha256 prq_cs, exe_cs, env_cs; { // This is essentially ps=execute_prerequisites(a, t, mt) which we @@ -262,7 +623,9 @@ namespace build2 wait_guard wg (ctx, busy, t[a].task_count); - for (const target*& pt: t.prerequisite_targets[a]) + auto& pts (t.prerequisite_targets[a]); + + for (const target*& pt: pts) { if (pt == nullptr) // Skipped. continue; @@ -279,7 +642,7 @@ namespace build2 wg.wait (); bool e (mt == timestamp_nonexistent); - for (prerequisite_target& p: t.prerequisite_targets[a]) + for (prerequisite_target& p: pts) { if (p == nullptr) continue; @@ -318,56 +681,8 @@ namespace build2 // As part of this loop calculate checksums that need to include ad // hoc prerequisites (unless the script tracks changes itself). // - if (script.depdb_clear) - continue; - - hash_target (prq_cs, pt); - - // The script can reference a program in one of four ways: - // - // 1. As an (imported) target (e.g., $cli) - // - // 2. As a process_path_ex (e.g., $cxx.path). - // - // 3. As a builtin (e.g., sed) - // - // 4. As a program path/name. - // - // When it comes to change tracking, there is nothing we can do for - // (4) (the user can track its environment manually with depdb-env) - // and there is nothing to do for (3) (assuming builtin semantics is - // stable/backwards-compatible). The (2) case is handled automatically - // by hashing all the variable values referenced by the script (see - // below), which in case of process_path_ex includes the checksums - // (both executable and environment), if available. - // - // This leaves the (1) case, which itself splits into two sub-cases: - // the target comes with the dependency information (e.g., imported - // from a project via an export stub) or it does not (e.g., imported - // as installed). We don't need to do anything extra for the first - // sub-case since the target's state/mtime can be relied upon like any - // other prerequisite. Which cannot be said about the second sub-case, - // where we reply on checksum that may be included as part of the - // target metadata. - // - // So what we are going to do is hash checksum metadata of every - // executable prerequisite target that has it (we do it here in order - // to include ad hoc prerequisites, which feels like the right thing - // to do; the user may mark tools as ad hoc in order to omit them from - // $<). - // - if (auto* et = pt.is_a ()) - { - if (auto* c = et->lookup_metadata ("checksum")) - { - exe_cs.append (*c); - } - - if (auto* e = et->lookup_metadata ("environment")) - { - hash_environment (env_cs, *e); - } - } + if (!script.depdb_clear) + hash_prerequisite_target (prq_cs, exe_cs, env_cs, pt, storage); } if (!e) @@ -379,6 +694,8 @@ namespace build2 // We use depdb to track changes to the script itself, input/output file // names, tools, etc. // + // NOTE: see the "dynamic dependencies" version above. + // depdb dd (tp + ".d"); // First should come the rule name/version. @@ -411,76 +728,53 @@ namespace build2 l4 ([&]{trace << "recipe text change forcing update of " << t;}); // Track the variables, targets, and prerequisites changes, unless the - // script doesn't track the dependency changes itself. - // - - // For each variable hash its name, undefined/null/non-null indicator, - // and the value if non-null. - // - // Note that this excludes the special $< and $> variables which we - // handle below. - // - // @@ TODO: maybe detect and decompose process_path_ex in order to - // properly attribute checksum and environment changes? + // script tracks the dependency changes itself. // if (!script.depdb_clear) { - sha256 cs; - names storage; - - for (const string& n: script.vars) + // For each variable hash its name, undefined/null/non-null indicator, + // and the value if non-null. + // + // Note that this excludes the special $< and $> variables which we + // handle below. + // + // @@ TODO: maybe detect and decompose process_path_ex in order to + // properly attribute checksum and environment changes? + // { - cs.append (n); - - lookup l; - - if (const variable* var = ctx.var_pool.find (n)) - l = t[var]; - - cs.append (!l.defined () ? '\x1' : l->null ? '\x2' : '\x3'); + sha256 cs; + hash_script_vars (cs, script, t, storage); - if (l) - { - storage.clear (); - names_view ns (reverse (*l, storage)); - - for (const name& n: ns) - to_checksum (cs, n); - } + if (dd.expect (cs.string ()) != nullptr) + l4 ([&]{trace << "recipe variable change forcing update of " << t;}); } - if (dd.expect (cs.string ()) != nullptr) - l4 ([&]{trace << "recipe variable change forcing update of " << t;}); - } - - // Target and prerequisite sets ($> and $<). - // - if (!script.depdb_clear) - { - sha256 tcs; - for (const target* m (&t); m != nullptr; m = m->adhoc_member) - hash_target (tcs, *m); + // Target and prerequisite sets ($> and $<). + // + { + sha256 tcs; + for (const target* m (&t); m != nullptr; m = m->adhoc_member) + hash_target (tcs, *m, storage); - if (dd.expect (tcs.string ()) != nullptr) - l4 ([&]{trace << "target set change forcing update of " << t;}); + if (dd.expect (tcs.string ()) != nullptr) + l4 ([&]{trace << "target set change forcing update of " << t;}); - if (dd.expect (prq_cs.string ()) != nullptr) - l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); - } + if (dd.expect (prq_cs.string ()) != nullptr) + l4 ([&]{trace << "prerequisite set change forcing update of " << t;}); + } - // Finally the programs and environment checksums. - // - if (!script.depdb_clear) - { - if (dd.expect (exe_cs.string ()) != nullptr) - l4 ([&]{trace << "program checksum change forcing update of " << t;}); + // Finally the programs and environment checksums. + // + { + if (dd.expect (exe_cs.string ()) != nullptr) + l4 ([&]{trace << "program checksum change forcing update of " << t;}); - if (dd.expect (env_cs.string ()) != nullptr) - l4 ([&]{trace << "environment change forcing update of " << t;}); + if (dd.expect (env_cs.string ()) != nullptr) + l4 ([&]{trace << "environment change forcing update of " << t;}); + } } const scope* bs (nullptr); - const scope* rs (nullptr); // Execute the custom dependency change tracking commands, if present. // @@ -507,20 +801,19 @@ namespace build2 } build::script::environment env (a, t, false /* temp_dir */); - build::script::default_runner r; + build::script::default_runner run; if (depdb_preamble) { bs = &t.base_scope (); - rs = bs->root_scope (); if (script.depdb_preamble_temp_dir) env.set_temp_dir_variable (); build::script::parser p (ctx); - r.enter (env, script.start_loc); - p.execute_depdb_preamble (*rs, *bs, env, script, r, dd); + run.enter (env, script.start_loc); + p.execute_depdb_preamble (a, *bs, t, env, script, run, dd); } // Update if depdb mismatch. @@ -539,104 +832,124 @@ namespace build2 // below). // if (depdb_preamble) - r.leave (env, script.end_loc); + run.leave (env, script.end_loc); return *ps; } if (!ctx.dry_run || verb != 0) { - // Prepare to executing the script diag line and/or body. - // - // Note that it doesn't make much sense to use the temporary directory - // variable ($~) in the 'diag' builtin call, so we postpone setting it - // until the script body execution, that can potentially be omitted. + // Prepare to execute the script diag line and/or body. // if (bs == nullptr) - { bs = &t.base_scope (); - rs = bs->root_scope (); + + if (execute_update_file (*bs, a, t, env, run)) + { + if (!ctx.dry_run) + dd.check_mtime (tp); } + else if (depdb_preamble) + run.leave (env, script.end_loc); + } + else if (depdb_preamble) + run.leave (env, script.end_loc); - build::script::parser p (ctx); + t.mtime (system_clock::now ()); + return target_state::changed; + } - if (verb == 1) + bool adhoc_buildscript_rule:: + execute_update_file (const scope& bs, + action, const file& t, + build::script::environment& env, + build::script::default_runner& run, + bool deferred_failure) const + { + context& ctx (t.ctx); + + const scope& rs (*bs.root_scope ()); + + // Note that it doesn't make much sense to use the temporary directory + // variable ($~) in the 'diag' builtin call, so we postpone setting it + // until the script body execution, that can potentially be omitted. + // + build::script::parser p (ctx); + + if (verb == 1) + { + if (script.diag_line) { - if (script.diag_line) - { - text << p.execute_special (*rs, *bs, env, *script.diag_line); - } - else - { - // @@ TODO (and below): - // - // - we are printing target, not source (like in most other places) - // - // - printing of ad hoc target group (the {hxx cxx}{foo} idea) - // - // - if we are printing prerequisites, should we print all of them - // (including tools)? - // - text << *script.diag_name << ' ' << t; - } + text << p.execute_special (rs, bs, env, *script.diag_line); } - - if (!ctx.dry_run || verb >= 2) + else { - // On failure remove the target files that may potentially exist but - // be invalid. + // @@ TODO (and in default_action() below): // - small_vector rms; + // - we are printing target, not source (like in most other places) + // + // - printing of ad hoc target group (the {hxx cxx}{foo} idea) + // + // - if we are printing prerequisites, should we print all of them + // (including tools)? + // + text << *script.diag_name << ' ' << t; + } + } - if (!ctx.dry_run) + if (!ctx.dry_run || verb >= 2) + { + // On failure remove the target files that may potentially exist but + // be invalid. + // + small_vector rms; + + if (!ctx.dry_run) + { + for (const target* m (&t); m != nullptr; m = m->adhoc_member) { - for (const target* m (&t); m != nullptr; m = m->adhoc_member) - { - if (auto* f = m->is_a ()) - rms.emplace_back (f->path ()); - } + if (auto* f = m->is_a ()) + rms.emplace_back (f->path ()); } + } - if (script.body_temp_dir && !script.depdb_preamble_temp_dir) - env.set_temp_dir_variable (); + if (script.body_temp_dir && !script.depdb_preamble_temp_dir) + env.set_temp_dir_variable (); - p.execute_body (*rs, *bs, env, script, r, !depdb_preamble); + p.execute_body (rs, bs, env, script, run, script.depdb_preamble.empty ()); - if (!ctx.dry_run) - { - // If this is an executable, let's be helpful to the user and set - // the executable bit on POSIX. - // -#ifndef _WIN32 - auto chmod = [] (const path& p) - { - path_perms (p, - (path_perms (p) | - permissions::xu | - permissions::xg | - permissions::xo)); - }; - - for (const target* m (&t); m != nullptr; m = m->adhoc_member) - { - if (auto* p = m->is_a ()) - chmod (p->path ()); - } -#endif - dd.check_mtime (tp); + if (!ctx.dry_run) + { + if (deferred_failure) + fail << "expected error exit status from recipe body"; - for (auto& rm: rms) - rm.cancel (); + // If this is an executable, let's be helpful to the user and set + // the executable bit on POSIX. + // +#ifndef _WIN32 + auto chmod = [] (const path& p) + { + path_perms (p, + (path_perms (p) | + permissions::xu | + permissions::xg | + permissions::xo)); + }; + + for (const target* m (&t); m != nullptr; m = m->adhoc_member) + { + if (auto* p = m->is_a ()) + chmod (p->path ()); } +#endif + for (auto& rm: rms) + rm.cancel (); } - else if (depdb_preamble) - r.leave (env, script.end_loc); - } - else if (depdb_preamble) - r.leave (env, script.end_loc); - t.mtime (system_clock::now ()); - return target_state::changed; + return true; + } + else + return false; } target_state adhoc_buildscript_rule:: @@ -666,7 +979,7 @@ namespace build2 } else { - // @@ TODO: as above + // @@ TODO: as above (execute_update_file()). // text << *script.diag_name << ' ' << t; } diff --git a/libbuild2/adhoc-rule-buildscript.hxx b/libbuild2/adhoc-rule-buildscript.hxx index 7f9c10a..51d37d4 100644 --- a/libbuild2/adhoc-rule-buildscript.hxx +++ b/libbuild2/adhoc-rule-buildscript.hxx @@ -38,6 +38,18 @@ namespace build2 target_state perform_update_file (action, const target&) const; + struct match_data; + + target_state + perform_update_file_dyndep (action, const target&, match_data&) const; + + bool + execute_update_file (const scope&, + action a, const file&, + build::script::environment&, + build::script::default_runner&, + bool deferred_failure = false) const; + target_state default_action (action, const target&, const optional&) const; diff --git a/libbuild2/build/script/builtin-options.cxx b/libbuild2/build/script/builtin-options.cxx new file mode 100644 index 0000000..cf99b12 --- /dev/null +++ b/libbuild2/build/script/builtin-options.cxx @@ -0,0 +1,701 @@ +// -*- C++ -*- +// +// This file was generated by CLI, a command line interface +// compiler for C++. +// + +// Begin prologue. +// +#include +// +// End prologue. + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace build2 +{ + namespace build + { + namespace script + { + namespace cli + { + // unknown_option + // + unknown_option:: + ~unknown_option () throw () + { + } + + void unknown_option:: + print (::std::ostream& os) const + { + os << "unknown option '" << option ().c_str () << "'"; + } + + const char* unknown_option:: + what () const throw () + { + return "unknown option"; + } + + // unknown_argument + // + unknown_argument:: + ~unknown_argument () throw () + { + } + + void unknown_argument:: + print (::std::ostream& os) const + { + os << "unknown argument '" << argument ().c_str () << "'"; + } + + const char* unknown_argument:: + what () const throw () + { + return "unknown argument"; + } + + // missing_value + // + missing_value:: + ~missing_value () throw () + { + } + + void missing_value:: + print (::std::ostream& os) const + { + os << "missing value for option '" << option ().c_str () << "'"; + } + + const char* missing_value:: + what () const throw () + { + return "missing option value"; + } + + // invalid_value + // + invalid_value:: + ~invalid_value () throw () + { + } + + void invalid_value:: + print (::std::ostream& os) const + { + os << "invalid value '" << value ().c_str () << "' for option '" + << option ().c_str () << "'"; + + if (!message ().empty ()) + os << ": " << message ().c_str (); + } + + const char* invalid_value:: + what () const throw () + { + return "invalid option value"; + } + + // eos_reached + // + void eos_reached:: + print (::std::ostream& os) const + { + os << what (); + } + + const char* eos_reached:: + what () const throw () + { + return "end of argument stream reached"; + } + + // scanner + // + scanner:: + ~scanner () + { + } + + // argv_scanner + // + bool argv_scanner:: + more () + { + return i_ < argc_; + } + + const char* argv_scanner:: + peek () + { + if (i_ < argc_) + return argv_[i_]; + else + throw eos_reached (); + } + + const char* argv_scanner:: + next () + { + if (i_ < argc_) + { + const char* r (argv_[i_]); + + if (erase_) + { + for (int i (i_ + 1); i < argc_; ++i) + argv_[i - 1] = argv_[i]; + + --argc_; + argv_[argc_] = 0; + } + else + ++i_; + + ++start_position_; + return r; + } + else + throw eos_reached (); + } + + void argv_scanner:: + skip () + { + if (i_ < argc_) + { + ++i_; + ++start_position_; + } + else + throw eos_reached (); + } + + std::size_t argv_scanner:: + position () + { + return start_position_; + } + + // vector_scanner + // + bool vector_scanner:: + more () + { + return i_ < v_.size (); + } + + const char* vector_scanner:: + peek () + { + if (i_ < v_.size ()) + return v_[i_].c_str (); + else + throw eos_reached (); + } + + const char* vector_scanner:: + next () + { + if (i_ < v_.size ()) + return v_[i_++].c_str (); + else + throw eos_reached (); + } + + void vector_scanner:: + skip () + { + if (i_ < v_.size ()) + ++i_; + else + throw eos_reached (); + } + + std::size_t vector_scanner:: + position () + { + return start_position_ + i_; + } + + template + struct parser + { + static void + parse (X& x, bool& xs, scanner& s) + { + using namespace std; + + const char* o (s.next ()); + if (s.more ()) + { + string v (s.next ()); + istringstream is (v); + if (!(is >> x && is.peek () == istringstream::traits_type::eof ())) + throw invalid_value (o, v); + } + else + throw missing_value (o); + + xs = true; + } + }; + + template <> + struct parser + { + static void + parse (bool& x, scanner& s) + { + s.next (); + x = true; + } + }; + + template <> + struct parser + { + static void + parse (std::string& x, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (s.more ()) + x = s.next (); + else + throw missing_value (o); + + xs = true; + } + }; + + template + struct parser > + { + static void + parse (std::pair& x, bool& xs, scanner& s) + { + x.second = s.position (); + parser::parse (x.first, xs, s); + } + }; + + template + struct parser > + { + static void + parse (std::vector& c, bool& xs, scanner& s) + { + X x; + bool dummy; + parser::parse (x, dummy, s); + c.push_back (x); + xs = true; + } + }; + + template + struct parser > + { + static void + parse (std::set& c, bool& xs, scanner& s) + { + X x; + bool dummy; + parser::parse (x, dummy, s); + c.insert (x); + xs = true; + } + }; + + template + struct parser > + { + static void + parse (std::map& m, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (s.more ()) + { + std::size_t pos (s.position ()); + std::string ov (s.next ()); + std::string::size_type p = ov.find ('='); + + K k = K (); + V v = V (); + std::string kstr (ov, 0, p); + std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ())); + + int ac (2); + char* av[] = + { + const_cast (o), + 0 + }; + + bool dummy; + if (!kstr.empty ()) + { + av[1] = const_cast (kstr.c_str ()); + argv_scanner s (0, ac, av, false, pos); + parser::parse (k, dummy, s); + } + + if (!vstr.empty ()) + { + av[1] = const_cast (vstr.c_str ()); + argv_scanner s (0, ac, av, false, pos); + parser::parse (v, dummy, s); + } + + m[k] = v; + } + else + throw missing_value (o); + + xs = true; + } + }; + + template + void + thunk (X& x, scanner& s) + { + parser::parse (x.*M, s); + } + + template + void + thunk (X& x, scanner& s) + { + parser::parse (x.*M, x.*S, s); + } + } + } + } +} + +#include +#include + +namespace build2 +{ + namespace build + { + namespace script + { + // depdb_dep_options + // + + depdb_dep_options:: + depdb_dep_options () + : file_ (), + file_specified_ (false), + format_ (), + format_specified_ (false), + what_ (), + what_specified_ (false), + include_path_ (), + include_path_specified_ (false), + default_prereq_type_ (), + default_prereq_type_specified_ (false) + { + } + + bool depdb_dep_options:: + parse (int& argc, + char** argv, + bool erase, + ::build2::build::script::cli::unknown_mode opt, + ::build2::build::script::cli::unknown_mode arg) + { + ::build2::build::script::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool depdb_dep_options:: + parse (int start, + int& argc, + char** argv, + bool erase, + ::build2::build::script::cli::unknown_mode opt, + ::build2::build::script::cli::unknown_mode arg) + { + ::build2::build::script::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool depdb_dep_options:: + parse (int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::script::cli::unknown_mode opt, + ::build2::build::script::cli::unknown_mode arg) + { + ::build2::build::script::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool depdb_dep_options:: + parse (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::build2::build::script::cli::unknown_mode opt, + ::build2::build::script::cli::unknown_mode arg) + { + ::build2::build::script::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool depdb_dep_options:: + parse (::build2::build::script::cli::scanner& s, + ::build2::build::script::cli::unknown_mode opt, + ::build2::build::script::cli::unknown_mode arg) + { + bool r = _parse (s, opt, arg); + return r; + } + + typedef + std::map + _cli_depdb_dep_options_map; + + static _cli_depdb_dep_options_map _cli_depdb_dep_options_map_; + + struct _cli_depdb_dep_options_map_init + { + _cli_depdb_dep_options_map_init () + { + _cli_depdb_dep_options_map_["--file"] = + &::build2::build::script::cli::thunk< depdb_dep_options, path, &depdb_dep_options::file_, + &depdb_dep_options::file_specified_ >; + _cli_depdb_dep_options_map_["--format"] = + &::build2::build::script::cli::thunk< depdb_dep_options, string, &depdb_dep_options::format_, + &depdb_dep_options::format_specified_ >; + _cli_depdb_dep_options_map_["--what"] = + &::build2::build::script::cli::thunk< depdb_dep_options, string, &depdb_dep_options::what_, + &depdb_dep_options::what_specified_ >; + _cli_depdb_dep_options_map_["--include-path"] = + &::build2::build::script::cli::thunk< depdb_dep_options, dir_paths, &depdb_dep_options::include_path_, + &depdb_dep_options::include_path_specified_ >; + _cli_depdb_dep_options_map_["-I"] = + &::build2::build::script::cli::thunk< depdb_dep_options, dir_paths, &depdb_dep_options::include_path_, + &depdb_dep_options::include_path_specified_ >; + _cli_depdb_dep_options_map_["--default-prereq-type"] = + &::build2::build::script::cli::thunk< depdb_dep_options, string, &depdb_dep_options::default_prereq_type_, + &depdb_dep_options::default_prereq_type_specified_ >; + } + }; + + static _cli_depdb_dep_options_map_init _cli_depdb_dep_options_map_init_; + + bool depdb_dep_options:: + _parse (const char* o, ::build2::build::script::cli::scanner& s) + { + _cli_depdb_dep_options_map::const_iterator i (_cli_depdb_dep_options_map_.find (o)); + + if (i != _cli_depdb_dep_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool depdb_dep_options:: + _parse (::build2::build::script::cli::scanner& s, + ::build2::build::script::cli::unknown_mode opt_mode, + ::build2::build::script::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::build2::build::script::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + s.skip (); + r = true; + continue; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast (co.c_str ()), + const_cast (v) + }; + + ::build2::build::script::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::build2::build::script::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::build2::build::script::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::build2::build::script::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::script::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::script::cli::unknown_mode::fail: + { + throw ::build2::build::script::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::build2::build::script::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::build2::build::script::cli::unknown_mode::stop: + { + break; + } + case ::build2::build::script::cli::unknown_mode::fail: + { + throw ::build2::build::script::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } + } + } +} + +// Begin epilogue. +// +// +// End epilogue. + diff --git a/libbuild2/build/script/builtin-options.hxx b/libbuild2/build/script/builtin-options.hxx new file mode 100644 index 0000000..85d67b9 --- /dev/null +++ b/libbuild2/build/script/builtin-options.hxx @@ -0,0 +1,456 @@ +// -*- C++ -*- +// +// This file was generated by CLI, a command line interface +// compiler for C++. +// + +#ifndef LIBBUILD2_BUILD_SCRIPT_BUILTIN_OPTIONS_HXX +#define LIBBUILD2_BUILD_SCRIPT_BUILTIN_OPTIONS_HXX + +// Begin prologue. +// +// +// End prologue. + +#include +#include +#include +#include +#include + +#ifndef CLI_POTENTIALLY_UNUSED +# if defined(_MSC_VER) || defined(__xlC__) +# define CLI_POTENTIALLY_UNUSED(x) (void*)&x +# else +# define CLI_POTENTIALLY_UNUSED(x) (void)x +# endif +#endif + +namespace build2 +{ + namespace build + { + namespace script + { + namespace cli + { + class unknown_mode + { + public: + enum value + { + skip, + stop, + fail + }; + + unknown_mode (value); + + operator value () const + { + return v_; + } + + private: + value v_; + }; + + // Exceptions. + // + + class exception: public std::exception + { + public: + virtual void + print (::std::ostream&) const = 0; + }; + + ::std::ostream& + operator<< (::std::ostream&, const exception&); + + class unknown_option: public exception + { + public: + virtual + ~unknown_option () throw (); + + unknown_option (const std::string& option); + + const std::string& + option () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string option_; + }; + + class unknown_argument: public exception + { + public: + virtual + ~unknown_argument () throw (); + + unknown_argument (const std::string& argument); + + const std::string& + argument () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string argument_; + }; + + class missing_value: public exception + { + public: + virtual + ~missing_value () throw (); + + missing_value (const std::string& option); + + const std::string& + option () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string option_; + }; + + class invalid_value: public exception + { + public: + virtual + ~invalid_value () throw (); + + invalid_value (const std::string& option, + const std::string& value, + const std::string& message = std::string ()); + + const std::string& + option () const; + + const std::string& + value () const; + + const std::string& + message () const; + + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + + private: + std::string option_; + std::string value_; + std::string message_; + }; + + class eos_reached: public exception + { + public: + virtual void + print (::std::ostream&) const; + + virtual const char* + what () const throw (); + }; + + // Command line argument scanner interface. + // + // The values returned by next() are guaranteed to be valid + // for the two previous arguments up until a call to a third + // peek() or next(). + // + // The position() function returns a monotonically-increasing + // number which, if stored, can later be used to determine the + // relative position of the argument returned by the following + // call to next(). Note that if multiple scanners are used to + // extract arguments from multiple sources, then the end + // position of the previous scanner should be used as the + // start position of the next. + // + class scanner + { + public: + virtual + ~scanner (); + + virtual bool + more () = 0; + + virtual const char* + peek () = 0; + + virtual const char* + next () = 0; + + virtual void + skip () = 0; + + virtual std::size_t + position () = 0; + }; + + class argv_scanner: public scanner + { + public: + argv_scanner (int& argc, + char** argv, + bool erase = false, + std::size_t start_position = 0); + + argv_scanner (int start, + int& argc, + char** argv, + bool erase = false, + std::size_t start_position = 0); + + int + end () const; + + virtual bool + more (); + + virtual const char* + peek (); + + virtual const char* + next (); + + virtual void + skip (); + + virtual std::size_t + position (); + + protected: + std::size_t start_position_; + int i_; + int& argc_; + char** argv_; + bool erase_; + }; + + class vector_scanner: public scanner + { + public: + vector_scanner (const std::vector&, + std::size_t start = 0, + std::size_t start_position = 0); + + std::size_t + end () const; + + void + reset (std::size_t start = 0, std::size_t start_position = 0); + + virtual bool + more (); + + virtual const char* + peek (); + + virtual const char* + next (); + + virtual void + skip (); + + virtual std::size_t + position (); + + private: + std::size_t start_position_; + const std::vector& v_; + std::size_t i_; + }; + + template + struct parser; + } + } + } +} + +#include + +namespace build2 +{ + namespace build + { + namespace script + { + class depdb_dep_options + { + public: + depdb_dep_options (); + + // Return true if anything has been parsed. + // + bool + parse (int& argc, + char** argv, + bool erase = false, + ::build2::build::script::cli::unknown_mode option = ::build2::build::script::cli::unknown_mode::fail, + ::build2::build::script::cli::unknown_mode argument = ::build2::build::script::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + bool erase = false, + ::build2::build::script::cli::unknown_mode option = ::build2::build::script::cli::unknown_mode::fail, + ::build2::build::script::cli::unknown_mode argument = ::build2::build::script::cli::unknown_mode::stop); + + bool + parse (int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::script::cli::unknown_mode option = ::build2::build::script::cli::unknown_mode::fail, + ::build2::build::script::cli::unknown_mode argument = ::build2::build::script::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::build2::build::script::cli::unknown_mode option = ::build2::build::script::cli::unknown_mode::fail, + ::build2::build::script::cli::unknown_mode argument = ::build2::build::script::cli::unknown_mode::stop); + + bool + parse (::build2::build::script::cli::scanner&, + ::build2::build::script::cli::unknown_mode option = ::build2::build::script::cli::unknown_mode::fail, + ::build2::build::script::cli::unknown_mode argument = ::build2::build::script::cli::unknown_mode::stop); + + // Option accessors and modifiers. + // + const path& + file () const; + + path& + file (); + + void + file (const path&); + + bool + file_specified () const; + + void + file_specified (bool); + + const string& + format () const; + + string& + format (); + + void + format (const string&); + + bool + format_specified () const; + + void + format_specified (bool); + + const string& + what () const; + + string& + what (); + + void + what (const string&); + + bool + what_specified () const; + + void + what_specified (bool); + + const dir_paths& + include_path () const; + + dir_paths& + include_path (); + + void + include_path (const dir_paths&); + + bool + include_path_specified () const; + + void + include_path_specified (bool); + + const string& + default_prereq_type () const; + + string& + default_prereq_type (); + + void + default_prereq_type (const string&); + + bool + default_prereq_type_specified () const; + + void + default_prereq_type_specified (bool); + + // Implementation details. + // + protected: + bool + _parse (const char*, ::build2::build::script::cli::scanner&); + + private: + bool + _parse (::build2::build::script::cli::scanner&, + ::build2::build::script::cli::unknown_mode option, + ::build2::build::script::cli::unknown_mode argument); + + public: + path file_; + bool file_specified_; + string format_; + bool format_specified_; + string what_; + bool what_specified_; + dir_paths include_path_; + bool include_path_specified_; + string default_prereq_type_; + bool default_prereq_type_specified_; + }; + } + } +} + +#include + +// Begin epilogue. +// +// +// End epilogue. + +#endif // LIBBUILD2_BUILD_SCRIPT_BUILTIN_OPTIONS_HXX diff --git a/libbuild2/build/script/builtin-options.ixx b/libbuild2/build/script/builtin-options.ixx new file mode 100644 index 0000000..06575c8 --- /dev/null +++ b/libbuild2/build/script/builtin-options.ixx @@ -0,0 +1,338 @@ +// -*- C++ -*- +// +// This file was generated by CLI, a command line interface +// compiler for C++. +// + +// Begin prologue. +// +// +// End prologue. + +#include + +namespace build2 +{ + namespace build + { + namespace script + { + namespace cli + { + // unknown_mode + // + inline unknown_mode:: + unknown_mode (value v) + : v_ (v) + { + } + + // exception + // + inline ::std::ostream& + operator<< (::std::ostream& os, const exception& e) + { + e.print (os); + return os; + } + + // unknown_option + // + inline unknown_option:: + unknown_option (const std::string& option) + : option_ (option) + { + } + + inline const std::string& unknown_option:: + option () const + { + return option_; + } + + // unknown_argument + // + inline unknown_argument:: + unknown_argument (const std::string& argument) + : argument_ (argument) + { + } + + inline const std::string& unknown_argument:: + argument () const + { + return argument_; + } + + // missing_value + // + inline missing_value:: + missing_value (const std::string& option) + : option_ (option) + { + } + + inline const std::string& missing_value:: + option () const + { + return option_; + } + + // invalid_value + // + inline invalid_value:: + invalid_value (const std::string& option, + const std::string& value, + const std::string& message) + : option_ (option), + value_ (value), + message_ (message) + { + } + + inline const std::string& invalid_value:: + option () const + { + return option_; + } + + inline const std::string& invalid_value:: + value () const + { + return value_; + } + + inline const std::string& invalid_value:: + message () const + { + return message_; + } + + // argv_scanner + // + inline argv_scanner:: + argv_scanner (int& argc, + char** argv, + bool erase, + std::size_t sp) + : start_position_ (sp + 1), + i_ (1), + argc_ (argc), + argv_ (argv), + erase_ (erase) + { + } + + inline argv_scanner:: + argv_scanner (int start, + int& argc, + char** argv, + bool erase, + std::size_t sp) + : start_position_ (sp + static_cast (start)), + i_ (start), + argc_ (argc), + argv_ (argv), + erase_ (erase) + { + } + + inline int argv_scanner:: + end () const + { + return i_; + } + + // vector_scanner + // + inline vector_scanner:: + vector_scanner (const std::vector& v, + std::size_t i, + std::size_t sp) + : start_position_ (sp), v_ (v), i_ (i) + { + } + + inline std::size_t vector_scanner:: + end () const + { + return i_; + } + + inline void vector_scanner:: + reset (std::size_t i, std::size_t sp) + { + i_ = i; + start_position_ = sp; + } + } + } + } +} + +namespace build2 +{ + namespace build + { + namespace script + { + // depdb_dep_options + // + + inline const path& depdb_dep_options:: + file () const + { + return this->file_; + } + + inline path& depdb_dep_options:: + file () + { + return this->file_; + } + + inline void depdb_dep_options:: + file (const path& x) + { + this->file_ = x; + } + + inline bool depdb_dep_options:: + file_specified () const + { + return this->file_specified_; + } + + inline void depdb_dep_options:: + file_specified (bool x) + { + this->file_specified_ = x; + } + + inline const string& depdb_dep_options:: + format () const + { + return this->format_; + } + + inline string& depdb_dep_options:: + format () + { + return this->format_; + } + + inline void depdb_dep_options:: + format (const string& x) + { + this->format_ = x; + } + + inline bool depdb_dep_options:: + format_specified () const + { + return this->format_specified_; + } + + inline void depdb_dep_options:: + format_specified (bool x) + { + this->format_specified_ = x; + } + + inline const string& depdb_dep_options:: + what () const + { + return this->what_; + } + + inline string& depdb_dep_options:: + what () + { + return this->what_; + } + + inline void depdb_dep_options:: + what (const string& x) + { + this->what_ = x; + } + + inline bool depdb_dep_options:: + what_specified () const + { + return this->what_specified_; + } + + inline void depdb_dep_options:: + what_specified (bool x) + { + this->what_specified_ = x; + } + + inline const dir_paths& depdb_dep_options:: + include_path () const + { + return this->include_path_; + } + + inline dir_paths& depdb_dep_options:: + include_path () + { + return this->include_path_; + } + + inline void depdb_dep_options:: + include_path (const dir_paths& x) + { + this->include_path_ = x; + } + + inline bool depdb_dep_options:: + include_path_specified () const + { + return this->include_path_specified_; + } + + inline void depdb_dep_options:: + include_path_specified (bool x) + { + this->include_path_specified_ = x; + } + + inline const string& depdb_dep_options:: + default_prereq_type () const + { + return this->default_prereq_type_; + } + + inline string& depdb_dep_options:: + default_prereq_type () + { + return this->default_prereq_type_; + } + + inline void depdb_dep_options:: + default_prereq_type (const string& x) + { + this->default_prereq_type_ = x; + } + + inline bool depdb_dep_options:: + default_prereq_type_specified () const + { + return this->default_prereq_type_specified_; + } + + inline void depdb_dep_options:: + default_prereq_type_specified (bool x) + { + this->default_prereq_type_specified_ = x; + } + } + } +} + +// Begin epilogue. +// +// +// End epilogue. diff --git a/libbuild2/build/script/builtin.cli b/libbuild2/build/script/builtin.cli new file mode 100644 index 0000000..3ed3659 --- /dev/null +++ b/libbuild2/build/script/builtin.cli @@ -0,0 +1,32 @@ +// file : libbuild2/build/script/builtin.cli +// license : MIT; see accompanying LICENSE file + +include ; + +// Note that options in this file are undocumented because we generate neither +// the usage printing code nor man pages. Instead, they are documented in the +// manual. +// +namespace build2 +{ + namespace build + { + namespace script + { + // Pseudo-builtin options. + // + class depdb_dep_options + { + // Note that --byproduct, if any, must be the first option and is + // handled ad hoc, kind of as a sub-command. + // + path --file; // Read from file rather than stdin. + string --format; // Dependency format: make (default). + string --what; // Dependency kind, e.g., "header". + dir_paths --include-path|-I; // Search paths for generated files. + string --default-prereq-type; // Default prerequisite type to use + // if none could be derived from ext. + }; + } + } +} diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index 217fa11..67dbf69 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -3,13 +3,22 @@ #include +#include // strcmp() +#include + #include +#include +#include #include #include +#include + +#include #include #include +#include using namespace std; using namespace butl; @@ -125,6 +134,8 @@ namespace build2 // Save the custom dependency change tracking lines, if present. // s.depdb_clear = depdb_clear_.has_value (); + if (depdb_dyndep_) + s.depdb_dyndep = depdb_dyndep_->second; s.depdb_preamble = move (depdb_preamble_); return s; @@ -487,7 +498,11 @@ namespace build2 next (t, tt); if (tt != type::word || - (v != "clear" && v != "hash" && v != "string" && v != "env")) + (v != "clear" && + v != "hash" && + v != "string" && + v != "env" && + v != "dyndep")) { fail (get_location (t)) << "expected 'depdb' builtin command instead of " << t; @@ -527,12 +542,39 @@ namespace build2 // the referenced variable list, since it won't be used. // depdb_clear_ = l; - save_line_ = nullptr; + save_line_ = nullptr; script_->vars.clear (); } else { + // Verify depdb-dyndep is last. + // + if (v == "dyndep") + { + // Note that for now we do not allow multiple dyndep calls. + // But we may wan to relax this later (though alternating + // targets with prerequisites in depdb may be tricky -- maybe + // still only allow additional targets in the first call). + // + if (!depdb_dyndep_) + depdb_dyndep_ = make_pair (l, depdb_preamble_.size ()); + else + fail (l) << "multiple 'depdb dyndep' calls" << + info (depdb_dyndep_->first) << "previous call is here"; + +#if 0 + if (peek () == type::word && peeked ().value == "--byproduct") + ; +#endif + } + else + { + if (depdb_dyndep_) + fail (l) << "'depdb " << v << "' after 'depdb dyndep'" << + info (depdb_dyndep_->first) << "'depdb dyndep' call is here"; + } + // Move the script body to the end of the depdb preamble. // // Note that at this (pre-parsing) stage we cannot evaluate if @@ -885,114 +927,140 @@ namespace build2 } void parser:: - execute_depdb_preamble (const scope& rs, const scope& bs, - environment& e, const script& s, runner& r, - depdb& dd) + exec_depdb_preamble (action a, const scope& bs, const file& t, + environment& e, const script& s, runner& r, + lines_iterator begin, lines_iterator end, + depdb& dd, + bool* update, + bool* deferred_failure, + optional mt) { - tracer trace ("execute_depdb_preamble"); + tracer trace ("exec_depdb_preamble"); // The only valid lines in the depdb preamble are the depdb builtin // itself as well as the variable assignments, including via the set // builtin. - pre_exec (rs, bs, e, &s, &r); + pre_exec (*bs.root_scope (), bs, e, &s, &r); // Let's "wrap up" the objects we operate upon into the single object // to rely on "small function object" optimization. // struct { + tracer& trace; + + action a; + const scope& bs; + const file& t; + environment& env; const script& scr; + depdb& dd; - tracer& trace; - } ctx {e, s, dd, trace}; - - auto exec_cmd = [&ctx, this] - (token& t, - build2::script::token_type& tt, - size_t li, - bool /* single */, - const location& ll) + bool* update; + bool* deferred_failure; + optional mt; + + } data {trace, a, bs, t, e, s, dd, update, deferred_failure, mt}; + + auto exec_cmd = [this, &data] (token& t, + build2::script::token_type& tt, + size_t li, + bool /* single */, + const location& ll) { + // Note that we never reset the line index to zero (as we do in + // execute_body()) assuming that there are some script body + // commands to follow. + // if (tt == type::word && t.value == "depdb") { - names ns (exec_special (t, tt)); + next (t, tt); // This should have been enforced during pre-parsing. // - assert (!ns.empty ()); // ... + assert (tt == type::word); // ... - const string& cmd (ns[0].value); + string cmd (move (t.value)); - if (cmd == "hash") + if (cmd == "dyndep") { - sha256 cs; - for (auto i (ns.begin () + 1); i != ns.end (); ++i) // Skip . - to_checksum (cs, *i); - - if (ctx.dd.expect (cs.string ()) != nullptr) - l4 ([&] { - ctx.trace (ll) - << "'depdb hash' argument change forcing update of " - << ctx.env.target;}); + // Note: cast is safe since this is always executed in apply(). + // + exec_depdb_dyndep (t, tt, + li, ll, + data.a, data.bs, const_cast (data.t), + data.dd, + *data.update, + *data.deferred_failure, + *data.mt); } - else if (cmd == "string") + else { - string s; - try - { - s = convert ( - names (make_move_iterator (ns.begin () + 1), - make_move_iterator (ns.end ()))); - } - catch (const invalid_argument& e) + names ns (exec_special (t, tt, true /* skip */)); + + if (cmd == "hash") { - fail (ll) << "invalid 'depdb string' argument: " << e; + sha256 cs; + for (const name& n: ns) + to_checksum (cs, n); + + if (data.dd.expect (cs.string ()) != nullptr) + l4 ([&] { + data.trace (ll) + << "'depdb hash' argument change forcing update of " + << data.t;}); } - - if (ctx.dd.expect (s) != nullptr) - l4 ([&] { - ctx.trace (ll) - << "'depdb string' argument change forcing update of " - << ctx.env.target;}); - } - else if (cmd == "env") - { - sha256 cs; - const char* pf ("invalid 'depdb env' argument: "); - - try + else if (cmd == "string") { - // Skip . - // - for (auto i (ns.begin () + 1); i != ns.end (); ++i) + string s; + try + { + s = convert (move (ns)); + } + catch (const invalid_argument& e) { - string vn (convert (move (*i))); - build2::script::verify_environment_var_name (vn, pf, ll); - hash_environment (cs, vn); + fail (ll) << "invalid 'depdb string' argument: " << e; } + + if (data.dd.expect (s) != nullptr) + l4 ([&] { + data.trace (ll) + << "'depdb string' argument change forcing update of " + << data.t;}); } - catch (const invalid_argument& e) + else if (cmd == "env") { - fail (ll) << pf << e; - } + sha256 cs; + const char* pf ("invalid 'depdb env' argument: "); - if (ctx.dd.expect (cs.string ()) != nullptr) - l4 ([&] { - ctx.trace (ll) - << "'depdb env' environment change forcing update of " - << ctx.env.target;}); + try + { + for (name& n: ns) + { + string vn (convert (move (n))); + build2::script::verify_environment_var_name (vn, pf, ll); + hash_environment (cs, vn); + } + } + catch (const invalid_argument& e) + { + fail (ll) << pf << e; + } + + if (data.dd.expect (cs.string ()) != nullptr) + l4 ([&] { + data.trace (ll) + << "'depdb env' environment change forcing update of " + << data.t;}); + } + else + assert (false); } - else - assert (false); } else { - // Note that we don't reset the line index to zero (as we do in - // execute_body()) assuming that there are some script body - // commands to follow. - // command_expr ce ( parse_command_line (t, static_cast (tt))); @@ -1006,7 +1074,7 @@ namespace build2 p.recall.string () == "set"; }) == ce.end ()) { - const replay_tokens& rt (ctx.scr.depdb_preamble.back ().tokens); + const replay_tokens& rt (data.scr.depdb_preamble.back ().tokens); assert (!rt.empty ()); fail (ll) << "disallowed command in depdb preamble" << @@ -1019,7 +1087,7 @@ namespace build2 } }; - exec_lines (s.depdb_preamble, exec_cmd); + exec_lines (begin, end, exec_cmd); } void parser:: @@ -1051,7 +1119,7 @@ namespace build2 } void parser:: - exec_lines (const lines& lns, + exec_lines (lines_iterator begin, lines_iterator end, const function& exec_cmd) { // Note that we rely on "small function object" optimization for the @@ -1090,25 +1158,23 @@ namespace build2 return runner_->run_if (*environment_, ce, li, ll); }; - build2::script::parser::exec_lines (lns.begin (), lns.end (), + build2::script::parser::exec_lines (begin, end, exec_set, exec_cmd, exec_if, environment_->exec_line, &environment_->var_pool); } names parser:: - exec_special (token& t, build2::script::token_type& tt, - bool omit_builtin) + exec_special (token& t, build2::script::token_type& tt, bool skip_first) { - if (omit_builtin) + if (skip_first) { assert (tt != type::newline && tt != type::eos); - next (t, tt); } return tt != type::newline && tt != type::eos - ? parse_names (t, tt, pattern_mode::expand) + ? parse_names (t, tt, pattern_mode::ignore) : names (); } @@ -1134,6 +1200,649 @@ namespace build2 return r; } + void parser:: + exec_depdb_dyndep (token& lt, build2::script::token_type& ltt, + size_t li, const location& ll, + action a, const scope& bs, file& t, + depdb& dd, + bool& update, + bool& deferred_failure, + timestamp mt) + { + tracer trace ("exec_depdb_dyndep"); + + context& ctx (t.ctx); + + // Similar approach to parse_env_builtin(). + // + depdb_dep_options ops; + bool prog (false); + { + auto& t (lt); + auto& tt (ltt); + + next (t, tt); // Skip 'dep' command. + + // Note that an option name and value can belong to different name + // chunks. That's why we parse the arguments in the chunking mode + // into the list up to the `--` separator and parse this list into + // options afterwards. Note that the `--` separator should be + // omitted if there is no program (i.e., additional dependency info + // is being read from one of the prerequisites). + // + strings args; + + names ns; // Reuse to reduce allocations. + while (tt != type::newline && tt != type::eos) + { + if (tt == type::word && t.value == "--") + { + prog = true; + break; + } + + location l (get_location (t)); + + if (!start_names (tt)) + fail (l) << "depdb dyndep: expected option or '--' separator " + << "instead of " << t; + + parse_names (t, tt, + ns, + pattern_mode::ignore, + true /* chunk */, + "depdb dyndep builtin argument", + nullptr); + + for (name& n: ns) + { + try + { + args.push_back (convert (move (n))); + } + catch (const invalid_argument&) + { + diag_record dr (fail (l)); + dr << "invalid string value "; + to_stream (dr.os, n, true /* quote */); + } + } + + ns.clear (); + } + + if (prog) + { + next (t, tt); // Skip '--'. + + if (tt == type::newline || tt == type::eos) + fail (t) << "depdb dyndep: expected program name instead of " + << t; + } + + // Parse the options. + // + // We would like to support both -I as well as -I forms + // for better compatibility. The latter requires manual parsing. + // + try + { + for (cli::vector_scanner scan (args); scan.more (); ) + { + if (ops.parse (scan, cli::unknown_mode::stop) && !scan.more ()) + break; + + const char* a (scan.peek ()); + + // Handle -I + // + if (a[0] == '-' && a[1] == 'I') + { + try + { + ops.include_path ().push_back (dir_path (a + 2)); + } + catch (const invalid_path&) + { + throw cli::invalid_value ("-I", a + 2); + } + + scan.next (); + continue; + } + +#if 0 + // Handle --byproduct in the wrong place. + // + if (strcmp (a, "--byproduct") == 0) + fail (ll) << "depdb dyndep: --byproduct must be first option"; +#endif + + // Handle unknown option. + // + if (a[0] == '-') + throw cli::unknown_option (a); + + // Handle unexpected argument. + // + fail (ll) << "depdb dyndep: unexpected argument '" << a << "'"; + } + } + catch (const cli::exception& e) + { + fail (ll) << "depdb dyndep: " << e; + } + } + + // Get the default prerequisite type falling back to file{} if not + // specified. + // + // The reason one would want to specify it is to make sure different + // rules "resolve" the same dynamic prerequisites to the same targets. + // For example, a rule that implements custom C compilation for some + // translation unit would want to make sure it resolves extracted + // system headers to h{} targets analogous to the c module's rule. + // + const target_type* def_pt; + if (ops.default_prereq_type_specified ()) + { + const string& t (ops.default_prereq_type ()); + + def_pt = bs.find_target_type (t); + if (def_pt == nullptr) + fail (ll) << "unknown target type '" << t << "'"; + } + else + def_pt = &file::static_type; + + // This code is based on the prior work in the cc module (specifically + // extract_headers()) where you can often find more detailed rationale + // for some of the steps performed. + + using dyndep = dyndep_rule; + + // Build the maps lazily, only if/when needed. + // + using prefix_map = dyndep::prefix_map; + using srcout_map = dyndep::srcout_map; + + function map_ext ( + [] (const scope& bs, const string& n, const string& e) + { + // @@ TODO: allow specifying base target types. + // + // Feels like the only reason one would want to specify base types + // is to tighten things up (as opposed to making some setup work) + // since it essentially restricts the set of registered target + // types that we will consider. + // + // Note also that these would be this project's target types while + // the file could be from another project. + // + return dyndep::map_extension (bs, n, e, nullptr); + + // @@ TODO: should we return something as fallback (file{}, + // def_pt)? Note: not the same semantics as enter_file()'s + // fallback. Feels like it could conceivably be different + // (e.g., h{} for fallback and hxx{} for some "unmappable" gen + // header). It looks like the "best" way currently is to define + // a custom target types for it (see moc{} in libQt5Core). + // + // Note also that we should only do this if bs is in our + // project. + }); + + // Don't we want to insert a "local"/prefixless mapping in case the + // user did not specify any -I's? But then will also need src-out + // remapping. So it will be equivalent to -I$out_base -I$src_base? But + // then it's not hard to add explicitly... + // + function pfx_map; + + struct + { + tracer& trace; + const location& ll; + const depdb_dep_options& ops; + optional map; + } pfx_data {trace, ll, ops, nullopt}; + + if (!ops.include_path ().empty ()) + { + pfx_map = [this, &pfx_data] (action, + const scope& bs, + const target& t) -> const prefix_map& + { + if (!pfx_data.map) + { + pfx_data.map = prefix_map (); + + const scope& rs (*bs.root_scope ()); + + for (dir_path d: pfx_data.ops.include_path ()) + { + if (d.relative ()) + fail (pfx_data.ll) << "depdb dyndep: relative include " + << "search path " << d; + + if (!d.normalized (false /* canonical dir seperators */)) + d.normalize (); + + // If we are not inside our project root, then ignore. + // + if (d.sub (rs.out_path ())) + dyndep::append_prefix ( + pfx_data.trace, *pfx_data.map, t, move (d)); + } + } + + return *pfx_data.map; + }; + } + + optional file; + enum class format {make} fmt (format::make); + command_expr cmd; + srcout_map so_map; + + // Parse the remainder of the command line as a program (which can be + // a pipe). If file is absent, then we save the command's stdout to a + // pipe. Otherwise, assume the command writes to file and add it to + // the cleanups. + // + // Note that MSVC /showInclude sends its output to stderr (and so + // could do other broken tools). However, the user can always merge + // stderr to stdout (2>&1). + // + auto init_run = [this, &ctx, + <, <t, &ll, + &ops, prog, &file, &cmd, &so_map] () + { + // --format + // + if (ops.format_specified ()) + { + const string& f (ops.format ()); + + if (f != "make") + fail (ll) << "depdb dyndep: invalid --format option value '" + << f << "'"; + } + + // --file + // + if (ops.file_specified ()) + { + file = move (ops.file ()); + + if (file->relative ()) + fail (ll) << "depdb dyndep: relative path specified with --file"; + } + + // Populate the srcout map with the -I$out_base -I$src_base pairs. + // + { + dyndep::srcout_builder builder (ctx, so_map); + + for (dir_path d: ops.include_path ()) + builder.next (move (d)); + } + + if (prog) + { + cmd = parse_command_line (lt, static_cast (ltt)); + + // If the output goes to stdout, then this should be a single + // pipeline without any logical operators (&& or ||). + // + if (!file && cmd.size () != 1) + fail (ll) << "depdb dyndep: command with stdout output cannot " + << "contain logical operators"; + + // Note that we may need to run this command multiple times. The + // two potential issues here are the re-registration of the + // clenups and re-use of the special files (stdin, stdout, etc; + // they include the line index in their names to avoid clashes + // between lines). + // + // Cleanups are not an issue, they will simply replaced. And + // overriding the contents of the special files seems harmless and + // consistent with what would happen if the command redirects its + // output to a non-special file. + // + if (file) + environment_->clean ( + {build2::script::cleanup_type::always, *file}, + true /* implicit */); + } + else + { + // Assume file is one of the prerequisites. + // + if (!file) + fail (ll) << "depdb dyndep: program or --file expected"; + } + }; + + // Enter as a target, update, and add to the list of prerequisite + // targets a file. + // + const char* what (ops.what_specified () + ? ops.what ().c_str () + : "file"); + + size_t skip_count (0); + auto add = [this, &trace, what, + a, &bs, &t, + &map_ext, def_pt, &pfx_map, &so_map, + &dd, &skip_count] (path fp, + bool cache, + timestamp mt) -> optional + { + context& ctx (t.ctx); + + // We can only defer the failure if we will be running the recipe + // body. + // + auto fail = [this, what, &ctx] (const auto& f) -> optional + { + bool df (!ctx.match_only && !ctx.dry_run_option); + + diag_record dr; + dr << error << what << ' ' << f << " not found and no rule to " + << "generate it"; + + if (df) + dr << info << "failure deferred to recipe body diagnostics"; + + if (verb < 4) + dr << info << "re-run with --verbose=4 for more information"; + + if (df) + return nullopt; + else + dr << endf; + }; + + if (const build2::file* ft = dyndep::enter_file ( + trace, what, + a, bs, t, + move (fp), cache, false /* normalize */, + map_ext, *def_pt, pfx_map, so_map).first) + { + if (optional u = dyndep::inject_file ( + trace, what, + a, t, + *ft, mt, false /* fail */)) + { + if (!cache) + dd.expect (ft->path ()); + + skip_count++; + return *u; + } + else if (cache) + { + dd.write (); // Invalidate this line. + return true; + } + else + return fail (*ft); + } + else + return fail (fp); + }; + + // If things go wrong (and they often do in this area), give the user + // a bit extra context. + // + auto df = make_diag_frame ( + [this, &ll, &t] (const diag_record& dr) + { + if (verb != 0) + dr << info (ll) << "while extracting dynamic dependencies for " + << t; + }); + + // If nothing so far has invalidated the dependency database, then try + // the cached data before running the program. + // + bool cache (!update); + + for (bool restart (true), first_run (true); restart; cache = false) + { + restart = false; + + if (cache) + { + // If any, this is always the first run. + // + assert (skip_count == 0); + + // We should always end with a blank line. + // + for (;;) + { + string* l (dd.read ()); + + // If the line is invalid, run the compiler. + // + if (l == nullptr) + { + restart = true; + break; + } + + if (l->empty ()) // Done, nothing changed. + return; + + if (optional r = add (path (move (*l)), true /*cache*/, mt)) + { + restart = *r; + + if (restart) + { + update = true; + l6 ([&]{trace << "restarting (cache)";}); + break; + } + } + else + { + // Trigger rebuild and mark as expected to fail. + // + update = true; + deferred_failure = true; + return; + } + } + } + else + { + if (first_run) + { + init_run (); + first_run = false; + } + else if (!prog) + { + fail (ll) << "generated " << what << " without program to retry"; + } + + // Save the timestamp just before we run the command. If we depend + // on any file that has been updated since, then we should assume + // we have "seen" the old copy and restart. + // + timestamp rmt (prog ? system_clock::now () : mt); + + // Run the command if any and reduce outputs to common istream. + // + // Note that the resulting stream should tolerate partial read. + // + // While reading the entire stdout into a string is not the most + // efficient way to do it, this does simplify things quite a bit, + // not least of which is not having to parse the output before + // knowing the program exist status. + // + istringstream iss; + if (prog) + { + string s; + build2::script::run (*environment_, + cmd, + li, + ll, + !file ? &s : nullptr); + + if (!file) + { + iss.str (move (s)); + iss.exceptions (istream::badbit); + } + } + + ifdstream ifs (ifdstream::badbit); + if (file) + try + { + ifs.open (*file); + } + catch (const io_error& e) + { + fail (ll) << "unable to open file " << *file << ": " << e; + } + + istream& is (file + ? static_cast (ifs) + : static_cast (iss)); + + const path_name& in (file + ? path_name (*file) + : path_name ("")); + + location il (in, 1); + + // The way we parse things is format-specific. + // + size_t skip (skip_count); + + switch (fmt) + { + case format::make: + { + using make_state = make_parser; + using make_type = make_parser::type; + + make_parser make; + + for (string l; !restart; ++il.line) // Reuse the buffer. + { + if (eof (getline (is, l))) + { + if (make.state != make_state::end) + fail (il) << "incomplete make dependency declaration"; + + break; + } + + size_t pos (0); + do + { + pair r; + { + auto df = make_diag_frame ( + [this, &l] (const diag_record& dr) + { + if (verb != 0) + dr << info << "while parsing make dependency " + << "declaration line '" << l << "'"; + }); + + r = make.next (l, pos, il, false /* strict */); + } + + if (r.second.empty ()) + continue; + + // @@ TODO: what should we do about targets? + // + // Note that if we take GCC as an example, things are + // quite messed up: by default it ignores -o and just + // takes the source file name and replaces the extension + // with a platform-appropriate object file extension. One + // can specify a custom target (or even multiple targets) + // with -MT or with -MQ (quoting). Though MinGW GCC still + // does not quote `:` with -MQ. So in this case it's + // definitely easier for the user to ignore the targets + // and just specify everything in the buildfile. + // + // On the other hand, other tools are likely to produce + // more sensible output (except perhaps for quoting). + // + // @@ Maybe in the lax mode we should only recognize `:` + // if it's separated on at least one side? + // + // Alternatively, we could detect Windows drives in + // paths and "handle" them (I believe this is what GNU + // make does). Maybe we should have three formats: + // make-lax, make, make-strict? + // + if (r.first == make_type::target) + continue; + + // Skip until where we left off. + // + if (skip != 0) + { + skip--; + continue; + } + + if (optional u = add (path (move (r.second)), + false /* cache */, + rmt)) + { + restart = *u; + + if (restart) + { + update = true; + l6 ([&]{trace << "restarting";}); + break; + } + } + else + { + // Trigger recompilation, mark as expected to fail, and + // bail out. + // + update = true; + deferred_failure = true; + break; + } + } + while (pos != l.size ()); + + if (make.state == make_state::end || deferred_failure) + break; + } + + break; + } + } + + // Bail out early if we have deferred a failure. + // + if (deferred_failure) + return; + } + } + + // Add the terminating blank line (we are updating depdb). + // + dd.expect (""); + } + // When add a special variable don't forget to update lexer::word(). // bool parser:: diff --git a/libbuild2/build/script/parser.hxx b/libbuild2/build/script/parser.hxx index b737a13..da15509 100644 --- a/libbuild2/build/script/parser.hxx +++ b/libbuild2/build/script/parser.hxx @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -82,21 +81,51 @@ namespace build2 // initialize/clean up the environment before/after the script // execution. // + // Note: having both root and base scopes for testing (where we pass + // global scope for both). + // void execute_body (const scope& root, const scope& base, environment&, const script&, runner&, bool enter = true, bool leave = true); + // Execute the first or the second (dyndep) half of the depdb + // preamble. + // // Note that it's the caller's responsibility to make sure that the // runner's enter() function is called before the first preamble/body // command execution and leave() -- after the last command. // void - execute_depdb_preamble (const scope& root, const scope& base, - environment&, const script&, runner&, - depdb&); + execute_depdb_preamble (action a, const scope& base, const file& t, + environment& e, const script& s, runner& r, + depdb& dd) + { + auto b (s.depdb_preamble.begin ()); + exec_depdb_preamble ( + a, base, t, + e, s, r, + b, + (s.depdb_dyndep + ? b + *s.depdb_dyndep + : s.depdb_preamble.end ()), + dd); + } + void + execute_depdb_preamble_dyndep ( + action a, const scope& base, file& t, + environment& e, const script& s, runner& r, + depdb& dd, bool& update, bool& deferred_failure, timestamp mt) + { + exec_depdb_preamble ( + a, base, t, + e, s, r, + s.depdb_preamble.begin () + *s.depdb_dyndep, + s.depdb_preamble.end (), + dd, &update, &deferred_failure, mt); + } // Parse a special builtin line into names, performing the variable // and pattern expansions. If omit_builtin is true, then omit the @@ -115,12 +144,38 @@ namespace build2 pre_exec (const scope& root, const scope& base, environment&, const script*, runner*); + using lines_iterator = lines::const_iterator; + + void + exec_lines (lines_iterator, lines_iterator, + const function&); + void - exec_lines (const lines&, const function&); + exec_lines (const lines& l, const function& c) + { + exec_lines (l.begin (), l.end (), c); + } names - exec_special (token& t, build2::script::token_type& tt, - bool omit_builtin = true); + exec_special (token&, build2::script::token_type&, bool skip_first); + + void + exec_depdb_preamble (action, const scope& base, const file&, + environment&, const script&, runner&, + lines_iterator begin, lines_iterator end, + depdb&, + bool* update = nullptr, + bool* deferred_failure = nullptr, + optional mt = nullopt); + + void + exec_depdb_dyndep (token&, build2::script::token_type&, + size_t line_index, const location&, + action, const scope& base, file&, + depdb&, + bool& update, + bool& deferred_failure, + timestamp); // Helpers. // @@ -219,8 +274,14 @@ namespace build2 // depdb env - Track the environment variables change as a // hash. // - optional depdb_clear_; // 'depdb clear' location if any. - lines depdb_preamble_; // Note: excludes 'depdb clear'. + // depdb dyndep ... - Extract dynamic dependency information. + // Can only be the last depdb builtin call + // in the preamble. + // + optional depdb_clear_; // depdb-clear location. + optional> + depdb_dyndep_; // depdb-dyndep location/position. + lines depdb_preamble_; // Note: excluding depdb-clear. // If present, the first impure function called in the body of the // script that performs update of a file-based target. diff --git a/libbuild2/build/script/runner.hxx b/libbuild2/build/script/runner.hxx index 431c446..558de9b 100644 --- a/libbuild2/build/script/runner.hxx +++ b/libbuild2/build/script/runner.hxx @@ -53,7 +53,7 @@ namespace build2 // Run command expressions. // // In dry-run mode don't run the expressions unless they are if- - // conditions or execute the set or exit builtins, but prints them at + // conditions or execute the set or exit builtins, but print them at // verbosity level 2 and up. // class default_runner: public runner diff --git a/libbuild2/build/script/script.hxx b/libbuild2/build/script/script.hxx index e11cb45..9d7567c 100644 --- a/libbuild2/build/script/script.hxx +++ b/libbuild2/build/script/script.hxx @@ -29,6 +29,10 @@ namespace build2 using build2::script::deadline; using build2::script::timeout; + // Forward declarations. + // + class default_runner; + // Notes: // // - Once parsed, the script can be executed in multiple threads with @@ -70,9 +74,10 @@ namespace build2 // The script's custom dependency change tracking lines (see the // script parser for details). // - bool depdb_clear; - lines_type depdb_preamble; - bool depdb_preamble_temp_dir = false; // True if references $~. + bool depdb_clear; + optional depdb_dyndep; // Position of first depdb-dyndep. + lines_type depdb_preamble; + bool depdb_preamble_temp_dir = false; // True if refs $~. location start_loc; location end_loc; diff --git a/libbuild2/build/script/types-parsers.cxx b/libbuild2/build/script/types-parsers.cxx new file mode 100644 index 0000000..9ecfa13 --- /dev/null +++ b/libbuild2/build/script/types-parsers.cxx @@ -0,0 +1,56 @@ +// file : libbuild2/build/script/types-parsers.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // cli namespace + +namespace build2 +{ + namespace build + { + namespace script + { + namespace cli + { + template + static void + parse_path (T& x, scanner& s) + { + const char* o (s.next ()); + + if (!s.more ()) + throw missing_value (o); + + const char* v (s.next ()); + + try + { + x = T (v); + + if (x.empty ()) + throw invalid_value (o, v); + } + catch (const invalid_path&) + { + throw invalid_value (o, v); + } + } + + void parser:: + parse (path& x, bool& xs, scanner& s) + { + xs = true; + parse_path (x, s); + } + + void parser:: + parse (dir_path& x, bool& xs, scanner& s) + { + xs = true; + parse_path (x, s); + } + } + } + } +} diff --git a/libbuild2/build/script/types-parsers.hxx b/libbuild2/build/script/types-parsers.hxx new file mode 100644 index 0000000..a42dab7 --- /dev/null +++ b/libbuild2/build/script/types-parsers.hxx @@ -0,0 +1,49 @@ +// file : libbuild2/build/script/types-parsers.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +// CLI parsers, included into the generated source files. +// + +#ifndef LIBBUILD2_BUILD_SCRIPT_TYPES_PARSERS_HXX +#define LIBBUILD2_BUILD_SCRIPT_TYPES_PARSERS_HXX + +#include + +namespace build2 +{ + namespace build + { + namespace script + { + namespace cli + { + class scanner; + + template + struct parser; + + template <> + struct parser + { + static void + parse (path&, bool&, scanner&); + + static void + merge (path& b, const path& a) {b = a;} + }; + + template <> + struct parser + { + static void + parse (dir_path&, bool&, scanner&); + + static void + merge (dir_path& b, const dir_path& a) {b = a;} + }; + } + } + } +} + +#endif // LIBBUILD2_BUILD_SCRIPT_TYPES_PARSERS_HXX diff --git a/libbuild2/buildfile b/libbuild2/buildfile index 17003b5..f21be7b 100644 --- a/libbuild2/buildfile +++ b/libbuild2/buildfile @@ -32,7 +32,8 @@ lib{build2}: libul{build2}: \ libul{build2}: script/{hxx ixx txx cxx}{** -*-options -**.test...} \ script/{hxx ixx cxx}{builtin-options} -libul{build2}: build/{hxx ixx txx cxx}{** -**.test...} +libul{build2}: build/script/{hxx ixx txx cxx}{** -*-options -**.test...} \ + build/script/{hxx ixx cxx}{builtin-options} # Note that this won't work in libul{} since it's not installed. # @@ -216,38 +217,56 @@ else # Generated options parser. # -script/ +# @@ Consider generating common cli runtime namespace if adding more option +# files. Plus sommon types-parser.?xx (which could also potentially be +# reused by the driver). +# +if $cli.configured { - if $cli.configured + cli.options += --std c++11 -I $src_root --include-with-brackets \ +--generate-vector-scanner --generate-modifier --generate-specifier \ +--suppress-usage + + cli.cxx{*}: { - cli.cxx{builtin-options}: cli{builtin} - - cli.options += --std c++11 -I $src_root --include-with-brackets \ ---include-prefix libbuild2/script --guard-prefix LIBBUILD2_SCRIPT \ ---cli-namespace build2::script::cli --generate-vector-scanner \ ---generate-modifier --generate-specifier --suppress-usage - - cli.cxx{*}: - { - # Include the generated cli files into the distribution and don't remove - # them when cleaning in src (so that clean results in a state identical - # to distributed). But don't install their headers since they are only - # used internally in the testscript implementation. - # - dist = true - clean = ($src_root != $out_root) - install = false - - # We keep the generated code in the repository so copy it back to src in - # case of a forwarded configuration. - # - backlink = overwrite - } - } - else - # No install for the pre-generated case. + # Include the generated cli files into the distribution and don't remove + # them when cleaning in src (so that clean results in a state identical + # to distributed). But don't install their headers since they are only + # used internally in the testscript implementation. + # + dist = true + clean = ($src_root != $out_root) + install = false + + # We keep the generated code in the repository so copy it back to src in + # case of a forwarded configuration. # - hxx{builtin-options}@./ ixx{builtin-options}@./: install = false + backlink = overwrite + } + + script/cli.cxx{builtin-options}: script/cli{builtin} + { + cli.options += --cli-namespace build2::script::cli \ +--include-prefix libbuild2/script --guard-prefix LIBBUILD2_SCRIPT + } + + build/script/cli.cxx{builtin-options}: build/script/cli{builtin} + { + cli.options += --cli-namespace build2::build::script::cli \ +--include-prefix libbuild2/build/script --guard-prefix LIBBUILD2_BUILD_SCRIPT \ +--cxx-prologue "#include " \ +--generate-parse + } +} +else +{ + # No install for the pre-generated case. + # + script/hxx{builtin-options}@./ \ + script/ixx{builtin-options}@./: install = false + + build/script/hxx{builtin-options}@./ \ + build/script/ixx{builtin-options}@./: install = false } # Install into the libbuild2/ subdirectory of, say, /usr/include/ diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index 87fce90..525821f 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -16,6 +16,7 @@ #include #include // mtime() #include +#include #include @@ -707,7 +708,7 @@ namespace build2 void compile_rule:: append_library_prefixes (appended_libraries& ls, prefix_map& pm, const scope& bs, - action a, target& t, linfo li) const + action a, const target& t, linfo li) const { struct data { @@ -730,14 +731,21 @@ namespace build2 if (find (d.ls.begin (), d.ls.end (), &l) != d.ls.end ()) return false; - const variable& var ( - com - ? c_export_poptions - : (t == x - ? x_export_poptions - : l.ctx.var_pool[t + ".export.poptions"])); - - append_prefixes (d.pm, l, var); + // If this target does not belong to any project (e.g, an "imported as + // installed" library), then it can't possibly generate any headers + // for us. + // + if (const scope* rs = l.base_scope ().root_scope ()) + { + const variable& var ( + com + ? c_export_poptions + : (t == x + ? x_export_poptions + : l.ctx.var_pool[t + ".export.poptions"])); + + append_prefixes (d.pm, *rs, l, var); + } if (com) d.ls.push_back (&l); @@ -775,69 +783,6 @@ namespace build2 } } - // Update the target during the match phase. Return true if it has changed - // or if the passed timestamp is not timestamp_unknown and is older than - // the target. - // - // This function is used to make sure header dependencies are up to date. - // - // There would normally be a lot of headers for every source file (think - // all the system headers) and just calling execute_direct() on all of - // them can get expensive. At the same time, most of these headers are - // existing files that we will never be updating (again, system headers, - // for example) and the rule that will match them is the fallback - // file_rule. That rule has an optimization: it returns noop_recipe (which - // causes the target state to be automatically set to unchanged) if the - // file is known to be up to date. So we do the update "smartly". - // - static bool - update (tracer& trace, action a, const target& t, timestamp ts) - { - const path_target* pt (t.is_a ()); - - if (pt == nullptr) - ts = timestamp_unknown; - - target_state os (t.matched_state (a)); - - if (os == target_state::unchanged) - { - if (ts == timestamp_unknown) - return false; - else - { - // We expect the timestamp to be known (i.e., existing file). - // - timestamp mt (pt->mtime ()); - assert (mt != timestamp_unknown); - return mt > ts; - } - } - else - { - // We only want to return true if our call to execute() actually - // caused an update. In particular, the target could already have been - // in target_state::changed because of a dependency extraction run for - // some other source file. - // - // @@ MT perf: so we are going to switch the phase and execute for - // any generated header. - // - phase_switch ps (t.ctx, run_phase::execute); - target_state ns (execute_direct (a, t)); - - if (ns != os && ns != target_state::unchanged) - { - l6 ([&]{trace << "updated " << t - << "; old state " << os - << "; new state " << ns;}); - return true; - } - else - return ts != timestamp_unknown ? pt->newer (ts, ns) : false; - } - } - recipe compile_rule:: apply (action a, target& xt) const { @@ -1097,6 +1042,8 @@ namespace build2 md.symexport = l ? cast (l) : symexport; } + // NOTE: see similar code in adhoc_buildscript_rule::apply(). + // Make sure the output directory exists. // // Is this the right thing to do? It does smell a bit, but then we do @@ -1547,77 +1494,13 @@ namespace build2 } } - // Reverse-lookup target type(s) from extension. - // - small_vector compile_rule:: - map_extension (const scope& bs, const string& n, const string& e) const - { - // We will just have to try all of the possible ones, in the "most - // likely to match" order. - // - auto test = [&bs, &n, &e] (const target_type& tt) -> bool - { - // Call the extension derivation function. Here we know that it will - // only use the target type and name from the target key so we can - // pass bogus values for the rest. - // - target_key tk {&tt, nullptr, nullptr, &n, nullopt}; - - // This is like prerequisite search. - // - optional de (tt.default_extension (tk, bs, nullptr, true)); - - return de && *de == e; - }; - - small_vector r; - - for (const target_type* const* p (x_inc); *p != nullptr; ++p) - if (test (**p)) - r.push_back (*p); - - // Next try target types derived from any of the C-source types. - // - const target_type_map& ttm (bs.root_scope ()->root_extra->target_types); - - for (auto i (ttm.type_begin ()), e (ttm.type_end ()); i != e; ++i) - { - const target_type& dt (i->second); - - for (const target_type* const* p (x_inc); *p != nullptr; ++p) - { - const target_type& bt (**p); - - if (dt.is_a (bt)) - { - if (dt != bt && test (dt)) - r.push_back (&dt); - - break; - } - } - } - - return r; - } - void compile_rule:: - append_prefixes (prefix_map& m, const target& t, const variable& var) const + append_prefixes (prefix_map& m, + const scope& rs, const target& t, + const variable& var) const { tracer trace (x, "compile_rule::append_prefixes"); - // If this target does not belong to any project (e.g, an "imported as - // installed" library), then it can't possibly generate any headers for - // us. - // - const scope& bs (t.base_scope ()); - const scope* rs (bs.root_scope ()); - if (rs == nullptr) - return; - - const dir_path& out_base (t.dir); - const dir_path& out_root (rs->out_path ()); - if (auto l = t[var]) { const auto& v (cast (l)); @@ -1675,136 +1558,8 @@ namespace build2 // If we are not inside our project root, then ignore. // - if (!d.sub (out_root)) - continue; - - // If the target directory is a sub-directory of the include - // directory, then the prefix is the difference between the - // two. Otherwise, leave it empty. - // - // The idea here is to make this "canonical" setup work auto- - // magically: - // - // 1. We include all files with a prefix, e.g., . - // 2. The library target is in the foo/ sub-directory, e.g., - // /tmp/foo/. - // 3. The poptions variable contains -I/tmp. - // - dir_path p (out_base.sub (d) ? out_base.leaf (d) : dir_path ()); - - // We use the target's directory as out_base but that doesn't work - // well for targets that are stashed in subdirectories. So as a - // heuristics we are going to also enter the outer directories of - // the original prefix. It is, however, possible, that another -I - // option after this one will produce one of these outer prefixes as - // its original prefix in which case we should override it. - // - // So we are going to assign the original prefix priority value 0 - // (highest) and then increment it for each outer prefix. - // - auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio) - { - auto j (m.lower_bound (p)), e (m.end ()); - - if (j != e && j->first != p) - j = e; - - if (j == m.end ()) - { - if (verb >= 4) - trace << "new mapping for prefix '" << p << "'\n" - << " new mapping to " << d << " priority " << prio; - - m.emplace (move (p), prefix_value {move (d), prio}); - } - else if (p.empty ()) - { - // For prefixless we keep all the entries since for them we have - // an extra check (target must be explicitly spelled out in a - // buildfile). - // - if (verb >= 4) - trace << "additional mapping for prefix '" << p << "'\n" - << " new mapping to " << d << " priority " << prio; - - // Find the position where to insert according to the priority. - // For equal priorities we use the insertion order. - // - do - { - if (j->second.priority > prio) - break; - } - while (++j != e && j->first == p); - - m.emplace_hint (j, move (p), prefix_value {move (d), prio}); - } - else - { - prefix_value& v (j->second); - - // We used to reject duplicates but it seems this can be - // reasonably expected to work according to the order of the - // -I options. - // - // Seeing that we normally have more "specific" -I paths first, - // (so that we don't pick up installed headers, etc), we ignore - // it. - // - if (v.directory == d) - { - if (v.priority > prio) - v.priority = prio; - } - else if (v.priority <= prio) - { - if (verb >= 4) - trace << "ignoring mapping for prefix '" << p << "'\n" - << " existing mapping to " << v.directory - << " priority " << v.priority << '\n' - << " another mapping to " << d - << " priority " << prio; - } - else - { - if (verb >= 4) - trace << "overriding mapping for prefix '" << p << "'\n" - << " existing mapping to " << v.directory - << " priority " << v.priority << '\n' - << " new mapping to " << d - << " priority " << prio; - - v.directory = move (d); - v.priority = prio; - } - } - }; - -#if 1 - // Enter all outer prefixes, including prefixless. - // - // The prefixless part is fuzzy but seems to be doing the right - // thing ignoring/overriding-wise, at least in cases where one of - // the competing -I paths is a subdirectory of another. - // - for (size_t prio (0);; ++prio) - { - bool e (p.empty ()); - enter ((e ? move (p) : p), (e ? move (d) : d), prio); - if (e) - break; - p = p.directory (); - } -#else - size_t prio (0); - for (bool e (false); !e; ++prio) - { - dir_path n (p.directory ()); - e = n.empty (); - enter ((e ? move (p) : p), (e ? move (d) : d), prio); - p = move (n); - } -#endif + if (d.sub (rs.out_path ())) + append_prefix (trace, m, t, move (d)); } } } @@ -1812,15 +1567,16 @@ namespace build2 auto compile_rule:: build_prefix_map (const scope& bs, action a, - target& t, + const target& t, linfo li) const -> prefix_map { prefix_map pm; // First process our own. // - append_prefixes (pm, t, x_poptions); - append_prefixes (pm, t, c_poptions); + const scope& rs (*bs.root_scope ()); + append_prefixes (pm, rs, t, x_poptions); + append_prefixes (pm, rs, t, c_poptions); // Then process the include directories from prerequisite libraries. // @@ -1830,6 +1586,9 @@ namespace build2 return pm; } + // @@ TMP + // +#if 0 // Return the next make prerequisite starting from the specified // position and update position to point to the start of the // following prerequisite or l.size() if there are none left. @@ -1891,6 +1650,7 @@ namespace build2 return r; } +#endif // VC /showIncludes output. The first line is the file being compiled // (unless clang-cl; handled by our caller). Then we have the list of @@ -2312,7 +2072,7 @@ namespace build2 if (verb > 2) { diag_record dr; - dr << error << "header '" << f << "' not found and no " + dr << error << "header " << f << " not found and no " << "rule to generate it"; if (verb < 4) @@ -3031,355 +2791,56 @@ namespace build2 } #endif - // Enter as a target a header file. Depending on the cache flag, the file - // is assumed to either have come from the depdb cache or from the - // compiler run. - // - // Return the header target and an indication of whether it was remapped - // or NULL if the header does not exist and cannot be generated. In the - // latter case the passed header path is guaranteed to be still valid but - // might have been adjusted (e.g., normalized, etc). - // // Note: this used to be a lambda inside extract_headers() so refer to the // body of that function for the overall picture. // pair compile_rule:: enter_header (action a, const scope& bs, file& t, linfo li, path&& f, bool cache, bool norm, - optional& pfx_map, srcout_map& so_map) const + optional& pfx_map, + const srcout_map& so_map) const { tracer trace (x, "compile_rule::enter_header"); - // Find or maybe insert the target. The directory is only moved from if - // insert is true. Note that it must be normalized. - // - auto find = [&trace, &t, this] (dir_path&& d, - path&& f, - bool insert) -> const file* - { - // Split the file into its name part and extension. Here we can assume - // the name part is a valid filesystem name. - // - // Note that if the file has no extension, we record an empty - // extension rather than NULL (which would signify that the default - // extension should be added). - // - string e (f.extension ()); - string n (move (f).string ()); - - if (!e.empty ()) - n.resize (n.size () - e.size () - 1); // One for the dot. - - // See if this directory is part of any project and if so determine - // the target type. - // - // While at it also determine if this target is from the src or out - // tree of said project. - // - dir_path out; - - // It's possible the extension-to-target type mapping is ambiguous - // (usually because both C and X-language headers use the same .h - // extension). In this case we will first try to find one that matches - // an explicit target (similar logic to when insert is false). - // - small_vector tts; - - // Note that the path can be in out or src directory and the latter - // can be associated with multiple scopes. So strictly speaking we - // need to pick one that is "associated" with us. But that is still a - // TODO (see scope_map::find() for details) and so for now we just - // pick the first one (it's highly unlikely the source file extension - // mapping will differ based on the configuration). - // - { - const scope& bs (**t.ctx.scopes.find (d).first); - if (const scope* rs = bs.root_scope ()) - { - tts = map_extension (bs, n, e); - - if (!bs.out_eq_src () && d.sub (bs.src_path ())) - out = out_src (d, *rs); - } - } - - // If it is outside any project, or the project doesn't have such an - // extension, assume it is a plain old C header. - // - if (tts.empty ()) - { - // If the project doesn't "know" this extension then we can't - // possibly find an explicit target of this type. - // - if (!insert) - { - l6 ([&]{trace << "unknown header " << n << " extension '" - << e << "'";}); - return nullptr; - } - - tts.push_back (&h::static_type); - } - - // Find or insert target. - // - // Note that in case of the target type ambiguity we first try to find - // an explicit target that resolves this ambiguity. - // - const target* r (nullptr); - - if (!insert || tts.size () > 1) - { - // Note that we skip any target type-specific searches (like for an - // existing file) and go straight for the target object since we - // need to find the target explicitly spelled out. - // - // Also, it doesn't feel like we should be able to resolve an - // absolute path with a spelled-out extension to multiple targets. - // - for (const target_type* tt: tts) - { - if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr) - break; - else - l6 ([&]{trace << "no targe with target type " << tt->name;}); - } - - // Note: we can't do this because of the in-source builds where - // there won't be explicit targets for non-generated headers. - // - // This should be harmless, however, since in our world generated - // headers are normally spelled-out as explicit targets. And if not, - // we will still get an error, just a bit less specific. - // -#if 0 - if (r == nullptr && insert) - { - f = d / n; - if (!e.empty ()) - { - f += '.'; - f += e; - } - - diag_record dr (fail); - dr << "mapping of header " << f << " to target type is ambiguous"; - for (const target_type* tt: tts) - dr << info << "could be " << tt->name << "{}"; - dr << info << "spell-out its target to resolve this ambiguity"; - } -#endif - } - - // @@ OPT: move d, out, n - // - if (r == nullptr && insert) - r = &search (t, *tts[0], d, out, n, &e, nullptr); - - return static_cast (r); - }; - - // If it's not absolute then it either does not (yet) exist or is a - // relative ""-include (see init_args() for details). Reduce the second - // case to absolute. - // - // Note: we now always use absolute path to the translation unit so this - // no longer applies. But let's keep it for posterity. - // -#if 0 - if (f.relative () && rels.relative ()) - { - // If the relative source path has a directory component, make sure - // it matches since ""-include will always start with that (none of - // the compilers we support try to normalize this path). Failed that - // we may end up searching for a generated header in a random - // (working) directory. - // - const string& fs (f.string ()); - const string& ss (rels.string ()); - - size_t p (path::traits::rfind_separator (ss)); - - if (p == string::npos || // No directory. - (fs.size () > p + 1 && - path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0)) - { - path t (work / f); // The rels path is relative to work. - - if (exists (t)) - f = move (t); - } - } -#endif - - const file* pt (nullptr); - bool remapped (false); - - // If still relative then it does not exist. - // - if (f.relative ()) - { - // This is probably as often an error as an auto-generated file, so - // trace at level 4. - // - l4 ([&]{trace << "non-existent header '" << f << "'";}); - - f.normalize (); - - // The relative path might still contain '..' (e.g., ../foo.hxx; - // presumably ""-include'ed). We don't attempt to support auto- - // generated headers with such inclusion styles. - // - if (f.normalized ()) - { - if (!pfx_map) - pfx_map = build_prefix_map (bs, a, t, li); - - // First try the whole file. Then just the directory. - // - // @@ Has to be a separate map since the prefix can be the same as - // the file name. - // - // auto i (pfx_map->find (f)); - - // Find the most qualified prefix of which we are a sub-path. - // - if (!pfx_map->empty ()) - { - dir_path d (f.directory ()); - auto p (pfx_map->sup_range (d)); - - if (p.first != p.second) - { - // Note that we can only have multiple entries for the - // prefixless mapping. - // - dir_path pd; // Reuse. - for (auto i (p.first); i != p.second; ++i) - { - // Note: value in pfx_map is not necessarily canonical. - // - pd = i->second.directory; - pd.canonicalize (); - - l4 ([&]{trace << "try prefix '" << d << "' mapped to " << pd;}); - - // If this is a prefixless mapping, then only use it if we can - // resolve it to an existing target (i.e., it is explicitly - // spelled out in a buildfile). @@ Hm, I wonder why, it's not - // like we can generate any header without an explicit target. - // Maybe for diagnostics (i.e., we will actually try to build - // something there instead of just saying no mapping). - // - pt = find (pd / d, f.leaf (), !i->first.empty ()); - if (pt != nullptr) - { - f = pd / f; - l4 ([&]{trace << "mapped as auto-generated " << f;}); - break; - } - else - l4 ([&]{trace << "no explicit target in " << pd;}); - } - } - else - l4 ([&]{trace << "no prefix map entry for '" << d << "'";}); - } - else - l4 ([&]{trace << "prefix map is empty";}); - } - } - else + struct data { - // Normalize the path unless it comes from the depdb, in which case - // we've already done that (normally). This is also where we handle - // src-out remap (again, not needed if cached). - // - if (!cache || norm) - normalize_header (f); - - if (!cache) + linfo li; + optional& pfx_map; + } d {li, pfx_map}; + + // If it is outside any project, or the project doesn't have such an + // extension, assume it is a plain old C header. + // + return enter_file ( + trace, "header", + a, bs, t, + move (f), cache, norm, + [this] (const scope& bs, const string& n, const string& e) { - if (!so_map.empty ()) - { - // Find the most qualified prefix of which we are a sub-path. - // - auto i (so_map.find_sup (f)); - if (i != so_map.end ()) - { - // Ok, there is an out tree for this headers. Remap to a path - // from the out tree and see if there is a target for it. Note - // that the value in so_map is not necessarily canonical. - // - dir_path d (i->second); - d /= f.leaf (i->first).directory (); - d.canonicalize (); - - pt = find (move (d), f.leaf (), false); // d is not moved from. - - if (pt != nullptr) - { - path p (d / f.leaf ()); - l4 ([&]{trace << "remapping " << f << " to " << p;}); - f = move (p); - remapped = true; - } - } - } - } - - if (pt == nullptr) + return map_extension (bs, n, e, x_inc); + }, + h::static_type, + [this, &d] (action a, const scope& bs, const target& t) + -> const prefix_map& { - l6 ([&]{trace << "entering " << f;}); - pt = find (f.directory (), f.leaf (), true); - } - } + if (!d.pfx_map) + d.pfx_map = build_prefix_map (bs, a, t, d.li); - return make_pair (pt, remapped); + return *d.pfx_map; + }, + so_map); } - // Update and add to the list of prerequisite targets a header or header - // unit target. - // - // Return the indication of whether it has changed or, if the passed - // timestamp is not timestamp_unknown, is older than the target. If the - // header does not exists nor can be generated (no rule), then issue - // diagnostics and fail if the fail argument is true and return nullopt - // otherwise. - // // Note: this used to be a lambda inside extract_headers() so refer to the // body of that function for the overall picture. // optional compile_rule:: inject_header (action a, file& t, - const file& pt, timestamp mt, bool f /* fail */) const + const file& pt, timestamp mt, bool fail) const { tracer trace (x, "compile_rule::inject_header"); - // Even if failing we still use try_match() in order to issue consistent - // (with extract_headers() below) diagnostics (rather than the generic - // "not rule to update ..."). - // - if (!try_match (a, pt).first) - { - if (!f) - return nullopt; - - diag_record dr; - dr << fail << "header " << pt << " not found and no rule to " - << "generate it"; - - if (verb < 4) - dr << info << "re-run with --verbose=4 for more information"; - } - - bool r (update (trace, a, pt, mt)); - - // Add to our prerequisite target list. - // - t.prerequisite_targets[a].push_back (&pt); - - return r; + return inject_file (trace, "header", a, t, pt, mt, fail); } // Extract and inject header dependencies. Return the preprocessed source @@ -3414,16 +2875,6 @@ namespace build2 file_cache::entry psrc; bool puse (true); - // If things go wrong (and they often do in this area), give the user a - // bit extra context. - // - auto df = make_diag_frame ( - [&src](const diag_record& dr) - { - if (verb != 0) - dr << info << "while extracting header dependencies from " << src; - }); - // Preprocesor mode that preserves as much information as possible while // still performing inclusions. Also serves as a flag indicating whether // this compiler uses the separate preprocess and compile setup. @@ -3579,9 +3030,9 @@ namespace build2 // generator by end-users optional by shipping pre-generated headers. // // This is a nasty problem that doesn't seem to have a perfect solution - // (except, perhaps, C++ modules). So what we are going to do is try to - // rectify the situation by detecting and automatically remapping such - // mis-inclusions. It works as follows. + // (except, perhaps, C++ modules and/or module mapper). So what we are + // going to do is try to rectify the situation by detecting and + // automatically remapping such mis-inclusions. It works as follows. // // First we will build a map of src/out pairs that were specified with // -I. Here, for performance and simplicity, we will assume that they @@ -3594,10 +3045,7 @@ namespace build2 // case, then we calculate a corresponding header in the out tree and, // (this is the most important part), check if there is a target for // this header in the out tree. This should be fairly accurate and not - // require anything explicit from the user except perhaps for a case - // where the header is generated out of nothing (so there is no need to - // explicitly mention its target in the buildfile). But this probably - // won't be very common. + // require anything explicit from the user. // // One tricky area in this setup are target groups: if the generated // sources are mentioned in the buildfile as a group, then there might @@ -3607,10 +3055,7 @@ namespace build2 // generated depending on the options (e.g., inline files might be // suppressed), headers are usually non-optional. // - // Note that we use path_map instead of dir_path_map to allow searching - // using path (file path). - // - srcout_map so_map; // path_map + srcout_map so_map; // Dynamic module mapper. // @@ -3690,17 +3135,13 @@ namespace build2 // Populate the src-out with the -I$out_base -I$src_base pairs. // { + srcout_builder builder (ctx, so_map); + // Try to be fast and efficient by reusing buffers as much as // possible. // string ds; - // Previous -I innermost scope if out_base plus the difference - // between the scope path and the -I path (normally empty). - // - const scope* s (nullptr); - dir_path p; - for (auto i (args.begin ()), e (args.end ()); i != e; ++i) { const char* o (*i); @@ -3725,7 +3166,7 @@ namespace build2 if (p == 0) { - s = nullptr; + builder.skip (); continue; } @@ -3758,68 +3199,14 @@ namespace build2 // if (!d.empty ()) { - // Ignore any paths containing '.', '..' components. Allow - // any directory separators though (think -I$src_root/foo - // on Windows). - // - if (d.absolute () && d.normalized (false)) - { - // If we have a candidate out_base, see if this is its - // src_base. - // - if (s != nullptr) - { - const dir_path& bp (s->src_path ()); - - if (d.sub (bp)) - { - if (p.empty () || d.leaf (bp) == p) - { - // We've got a pair. - // - so_map.emplace (move (d), s->out_path () / p); - s = nullptr; // Taken. - continue; - } - } - - // Not a pair. Fall through to consider as out_base. - // - s = nullptr; - } - - // See if this path is inside a project with an out-of- - // tree build and is in the out directory tree. - // - const scope& bs (ctx.scopes.find_out (d)); - if (bs.root_scope () != nullptr) - { - if (!bs.out_eq_src ()) - { - const dir_path& bp (bs.out_path ()); - - bool e; - if ((e = (d == bp)) || d.sub (bp)) - { - s = &bs; - if (e) - p.clear (); - else - p = d.leaf (bp); - } - } - } - } - else - s = nullptr; - - ds = move (d).string (); // Move the buffer out. + if (!builder.next (move (d))) + ds = move (d).string (); // Move the buffer back out. } else - s = nullptr; + builder.skip (); } else - s = nullptr; + builder.skip (); } } @@ -4159,15 +3546,12 @@ namespace build2 // to be inconvenient: some users like to re-run a failed build with // -s not to get "swamped" with errors. // - bool df (!ctx.match_only && !ctx.dry_run_option); - - const file* ht (enter_header (a, bs, t, li, - move (hp), cache, false /* norm */, - pfx_map, so_map).first); - if (ht == nullptr) + auto fail = [&ctx] (const auto& h) -> optional { + bool df (!ctx.match_only && !ctx.dry_run_option); + diag_record dr; - dr << error << "header '" << hp << "' not found and no rule to " + dr << error << "header " << h << " not found and no rule to " << "generate it"; if (df) @@ -4176,41 +3560,42 @@ namespace build2 if (verb < 4) dr << info << "re-run with --verbose=4 for more information"; - if (df) return nullopt; else dr << endf; - } + if (df) + return nullopt; + else + dr << endf; + }; - // If we are reading the cache, then it is possible the file has since - // been removed (think of a header in /usr/local/include that has been - // uninstalled and now we need to use one from /usr/include). This - // will lead to the match failure which we translate to a restart. - // - if (optional u = inject_header (a, t, *ht, mt, false /* fail */)) + if (const file* ht = enter_header (a, bs, t, li, + move (hp), cache, false /* norm */, + pfx_map, so_map).first) { - // Verify/add it to the dependency database. + // If we are reading the cache, then it is possible the file has + // since been removed (think of a header in /usr/local/include that + // has been uninstalled and now we need to use one from + // /usr/include). This will lead to the match failure which we + // translate to a restart. // - if (!cache) - dd.expect (ht->path ()); - - skip_count++; - return *u; - } - else if (!cache) - { - diag_record dr; - dr << error << "header " << *ht << " not found and no rule to " - << "generate it"; - - if (df) - dr << info << "failure deferred to compiler diagnostics"; - - if (verb < 4) - dr << info << "re-run with --verbose=4 for more information"; + if (optional u = inject_header (a, t, *ht, mt, false /*fail*/)) + { + // Verify/add it to the dependency database. + // + if (!cache) + dd.expect (ht->path ()); - if (df) return nullopt; else dr << endf; + skip_count++; + return *u; + } + else if (cache) + { + dd.write (); // Invalidate this line. + return true; + } + else + return fail (*ht); } - - dd.write (); // Invalidate this line. - return true; + else + return fail (hp); }; // As above but for a header unit. Note that currently it is only used @@ -4233,7 +3618,7 @@ namespace build2 if (ht == nullptr) { diag_record dr; - dr << error << "header '" << hp << "' not found and no rule to " + dr << error << "header " << hp << " not found and no rule to " << "generate it"; if (df) @@ -4279,6 +3664,16 @@ namespace build2 const path* drmp (nullptr); // Points to drm.path () if active. + // If things go wrong (and they often do in this area), give the user a + // bit extra context. + // + auto df = make_diag_frame ( + [&src](const diag_record& dr) + { + if (verb != 0) + dr << info << "while extracting header dependencies from " << src; + }); + // If nothing so far has invalidated the dependency database, then try // the cached data before running the compiler. // @@ -4843,13 +4238,16 @@ namespace build2 if (second) { + // Skip the source file. + // + make_parser::next (l, pos, true /* prereq */); second = false; - next_make (l, pos); // Skip the source file. } while (pos != l.size ()) { - string f (next_make (l, pos)); + string f ( + make_parser::next (l, pos, true /* prereq */).first); // Skip until where we left off. // diff --git a/libbuild2/cc/compile-rule.hxx b/libbuild2/cc/compile-rule.hxx index f573968..568c04b 100644 --- a/libbuild2/cc/compile-rule.hxx +++ b/libbuild2/cc/compile-rule.hxx @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -37,7 +38,8 @@ namespace build2 }; class LIBBUILD2_CC_SYMEXPORT compile_rule: public simple_rule, - virtual common + virtual common, + dyndep_rule { public: compile_rule (data&&); @@ -91,45 +93,21 @@ namespace build2 const scope&, action, const target&, linfo) const; - // Mapping of include prefixes (e.g., foo in ) for auto- - // generated headers to directories where they will be generated. - // - // We are using a prefix map of directories (dir_path_map) instead of - // just a map in order to also cover sub-paths (e.g., if - // we continue with the example). Specifically, we need to make sure we - // don't treat foobar as a sub-directory of foo. - // - // The priority is used to decide who should override whom. Lesser - // values are considered higher priority. Note that we can have multiple - // prefixless mapping (where priority is used to determine the order). - // See append_prefixes() for details. - // - // @@ The keys should be normalized. - // - struct prefix_value - { - dir_path directory; - size_t priority; - }; - using prefix_map = dir_path_multimap; + using prefix_map = dyndep_rule::prefix_map; + using srcout_map = dyndep_rule::srcout_map; void - append_prefixes (prefix_map&, const target&, const variable&) const; + append_prefixes (prefix_map&, + const scope&, const target&, + const variable&) const; void append_library_prefixes (appended_libraries&, prefix_map&, const scope&, - action, target&, linfo) const; + action, const target&, linfo) const; prefix_map - build_prefix_map (const scope&, action, target&, linfo) const; - - small_vector - map_extension (const scope&, const string&, const string&) const; - - // Src-to-out re-mapping. See extract_headers() for details. - // - using srcout_map = path_map; + build_prefix_map (const scope&, action, const target&, linfo) const; struct module_mapper_state; @@ -143,7 +121,7 @@ namespace build2 pair enter_header (action, const scope&, file&, linfo, path&&, bool, bool, - optional&, srcout_map&) const; + optional&, const srcout_map&) const; optional inject_header (action, file&, const file&, timestamp, bool) const; diff --git a/libbuild2/cc/utility.cxx b/libbuild2/cc/utility.cxx index ffe3e03..e02f85a 100644 --- a/libbuild2/cc/utility.cxx +++ b/libbuild2/cc/utility.cxx @@ -3,10 +3,6 @@ #include -#include - -using namespace std; - namespace build2 { namespace cc @@ -17,58 +13,5 @@ namespace build2 const dir_path module_build_dir (dir_path (module_dir) /= "build"); const dir_path module_build_modules_dir ( dir_path (module_build_dir) /= "modules"); - - void - normalize_header (path& f) - { - // Interestingly, on most paltforms and with most compilers (Clang on - // Linux being a notable exception) most system/compiler headers are - // already normalized. - // - path_abnormality a (f.abnormalities ()); - if (a != path_abnormality::none) - { - // While we can reasonably expect this path to exit, things do go - // south from time to time (like compiling under wine with file - // wlantypes.h included as WlanTypes.h). - // - try - { - // If we have any parent components, then we have to verify the - // normalized path matches realized. - // - path r; - if ((a & path_abnormality::parent) == path_abnormality::parent) - { - r = f; - r.realize (); - } - - try - { - f.normalize (); - - // Note that we might still need to resolve symlinks in the - // normalized path. - // - if (!r.empty () && f != r && path (f).realize () != r) - f = move (r); - } - catch (const invalid_path&) - { - assert (!r.empty ()); // Shouldn't have failed if no `..`. - f = move (r); // Fallback to realize. - } - } - catch (const invalid_path&) - { - fail << "invalid header path '" << f.string () << "'"; - } - catch (const system_error& e) - { - fail << "invalid header path '" << f.string () << "': " << e; - } - } - } } } diff --git a/libbuild2/cc/utility.hxx b/libbuild2/cc/utility.hxx index 42e53e3..6ba4a20 100644 --- a/libbuild2/cc/utility.hxx +++ b/libbuild2/cc/utility.hxx @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -51,29 +52,11 @@ namespace build2 // Normalize an absolute path to an existing header. // - // We used to just normalize the path but that could result in an invalid - // path (e.g., for some system/compiler headers on CentOS 7 with Clang - // 3.4) because of the symlinks (if a directory component is a symlink, - // then any following `..` are resolved relative to the target; see - // path::normalize() for background). - // - // Initially, to fix this, we realized (i.e., realpath(3)) it instead. - // But that turned out also not to be quite right since now we have all - // the symlinks resolved: conceptually it feels correct to keep the - // original header names since that's how the user chose to arrange things - // and practically this is how the compilers see/report them (e.g., the - // GCC module mapper). - // - // So now we have a pretty elaborate scheme where we try to use the - // normalized path if possible and fallback to realized. Normalized paths - // will work for situations where `..` does not cross symlink boundaries, - // which is the sane case. And for the insane case we only really care - // about out-of-project files (i.e., system/compiler headers). In other - // words, if you have the insane case inside your project, then you are on - // your own. - // - void - normalize_header (path&); + inline void + normalize_header (path& f) + { + normalize_external (f, "header"); + } } } diff --git a/libbuild2/dyndep.cxx b/libbuild2/dyndep.cxx new file mode 100644 index 0000000..51fa7bc --- /dev/null +++ b/libbuild2/dyndep.cxx @@ -0,0 +1,667 @@ +// file : libbuild2/dyndep.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace butl; + +namespace build2 +{ + bool dyndep_rule:: + update (tracer& trace, action a, const target& t, timestamp ts) + { + // In particular, this function is used to make sure header dependencies + // are up to date. + // + // There would normally be a lot of headers for every source file (think + // all the system headers) and just calling execute_direct() on all of + // them can get expensive. At the same time, most of these headers are + // existing files that we will never be updating (again, system headers, + // for example) and the rule that will match them is the fallback + // file_rule. That rule has an optimization: it returns noop_recipe (which + // causes the target state to be automatically set to unchanged) if the + // file is known to be up to date. So we do the update "smartly". + // + const path_target* pt (t.is_a ()); + + if (pt == nullptr) + ts = timestamp_unknown; + + target_state os (t.matched_state (a)); + + if (os == target_state::unchanged) + { + if (ts == timestamp_unknown) + return false; + else + { + // We expect the timestamp to be known (i.e., existing file). + // + timestamp mt (pt->mtime ()); + assert (mt != timestamp_unknown); + return mt > ts; + } + } + else + { + // We only want to return true if our call to execute() actually caused + // an update. In particular, the target could already have been in + // target_state::changed because of the dynamic dependency extraction + // run for some other target. + // + // @@ MT perf: so we are going to switch the phase and execute for + // any generated header. + // + phase_switch ps (t.ctx, run_phase::execute); + target_state ns (execute_direct (a, t)); + + if (ns != os && ns != target_state::unchanged) + { + l6 ([&]{trace << "updated " << t + << "; old state " << os + << "; new state " << ns;}); + return true; + } + else + return ts != timestamp_unknown ? pt->newer (ts, ns) : false; + } + } + + optional dyndep_rule:: + inject_file (tracer& trace, const char* what, + action a, target& t, + const file& pt, + timestamp mt, + bool f) + { + // Even if failing we still use try_match() in order to issue consistent + // (with other places) diagnostics (rather than the generic "not rule to + // update ..."). + // + if (!try_match (a, pt).first) + { + if (!f) + return nullopt; + + diag_record dr; + dr << fail << what << ' ' << pt << " not found and no rule to " + << "generate it"; + + if (verb < 4) + dr << info << "re-run with --verbose=4 for more information"; + } + + bool r (update (trace, a, pt, mt)); + + // Add to our prerequisite target list. + // + t.prerequisite_targets[a].push_back (&pt); + + return r; + } + + // Reverse-lookup target type(s) from file name/extension. + // + // If the list of base target types is specified, then only these types and + // those derived from them are considered. Otherwise, any file-based type is + // considered but not the file type itself. + // + small_vector dyndep_rule:: + map_extension (const scope& bs, + const string& n, const string& e, + const target_type* const* tts) + { + // We will just have to try all of the possible ones, in the "most + // likely to match" order. + // + auto test = [&bs, &n, &e] (const target_type& tt) -> bool + { + if (tt.default_extension != nullptr) + { + // Call the extension derivation function. Here we know that it will + // only use the target type and name from the target key so we can + // pass bogus values for the rest. + // + target_key tk {&tt, nullptr, nullptr, &n, nullopt}; + + // This is like prerequisite search. + // + optional de (tt.default_extension (tk, bs, nullptr, true)); + + return de && *de == e; + } + + return false; + }; + + small_vector r; + + if (tts != nullptr) + { + // @@ What if these types are not known by this project? Maybe this + // should just be unified with the below loop? Need to make sure + // we don't rely on the order in which they are returned. + // + for (const target_type* const* p (tts); *p != nullptr; ++p) + if (test (**p)) + r.push_back (*p); + } + + // Next try target types derived from any of the base types (or file if + // there are no base types). + // + const target_type_map& ttm (bs.root_scope ()->root_extra->target_types); + + for (auto i (ttm.type_begin ()), e (ttm.type_end ()); i != e; ++i) + { + const target_type& dt (i->second); + + if (tts != nullptr) + { + for (const target_type* const* p (tts); *p != nullptr; ++p) + { + const target_type& bt (**p); + + if (dt.is_a (bt)) + { + if (dt != bt && test (dt)) + r.push_back (&dt); + + break; + } + } + } + else + { + // Anything file-derived but not the file itself. + // + if (dt.is_a () && dt != file::static_type && test (dt)) + r.push_back (&dt); + } + } + + return r; + } + + void dyndep_rule:: + append_prefix (tracer& trace, prefix_map& m, const target& t, dir_path d) + { + // If the target directory is a sub-directory of the include directory, + // then the prefix is the difference between the two. Otherwise, leave it + // empty. + // + // The idea here is to make this "canonical" setup work auto-magically + // (using C/C++ #include's as an example): + // + // 1. We include all headers with a prefix, e.g., . + // + // 2. The library target is in the foo/ sub-directory, e.g., /tmp/foo/. + // + // 3. The poptions variable contains -I/tmp. + // + dir_path p (t.dir.sub (d) ? t.dir.leaf (d) : dir_path ()); + + // We use the target's directory as out_base but that doesn't work well + // for targets that are stashed in subdirectories. So as a heuristics we + // are going to also enter the outer directories of the original prefix. + // It is, however, possible, that another directory after this one will + // produce one of these outer prefixes as its original prefix in which + // case we should override it. + // + // So we are going to assign the original prefix priority value 0 + // (highest) and then increment it for each outer prefix. + // + auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio) + { + auto j (m.lower_bound (p)), e (m.end ()); + + if (j != e && j->first != p) + j = e; + + if (j == m.end ()) + { + if (verb >= 4) + trace << "new mapping for prefix '" << p << "'\n" + << " new mapping to " << d << " priority " << prio; + + m.emplace (move (p), prefix_value {move (d), prio}); + } + else if (p.empty ()) + { + // For prefixless we keep all the entries since for them we have an + // extra check (target must be explicitly spelled out in a buildfile). + // + if (verb >= 4) + trace << "additional mapping for prefix '" << p << "'\n" + << " new mapping to " << d << " priority " << prio; + + // Find the position where to insert according to the priority. + // For equal priorities we use the insertion order. + // + do + { + if (j->second.priority > prio) + break; + } + while (++j != e && j->first == p); + + m.emplace_hint (j, move (p), prefix_value {move (d), prio}); + } + else + { + prefix_value& v (j->second); + + // We used to reject duplicates but it seems this can be reasonably + // expected to work according to the order of, say, -I options. + // + // Seeing that we normally have more "specific" -I paths first, (so + // that we don't pick up installed headers, etc), we ignore it. + // + if (v.directory == d) + { + if (v.priority > prio) + v.priority = prio; + } + else if (v.priority <= prio) + { + if (verb >= 4) + trace << "ignoring mapping for prefix '" << p << "'\n" + << " existing mapping to " << v.directory + << " priority " << v.priority << '\n' + << " another mapping to " << d << " priority " << prio; + } + else + { + if (verb >= 4) + trace << "overriding mapping for prefix '" << p << "'\n" + << " existing mapping to " << v.directory + << " priority " << v.priority << '\n' + << " new mapping to " << d << " priority " << prio; + + v.directory = move (d); + v.priority = prio; + } + } + }; + + // Enter all outer prefixes, including prefixless. + // + // The prefixless part is fuzzy but seems to be doing the right thing + // ignoring/overriding-wise, at least in cases where one of the competing + // include search paths is a subdirectory of another. + // + for (size_t prio (0);; ++prio) + { + bool e (p.empty ()); + enter ((e ? move (p) : p), (e ? move (d) : d), prio); + if (e) + break; + p = p.directory (); + } + } + + bool dyndep_rule::srcout_builder:: + next (dir_path&& d) + { + // Ignore any paths containing '.', '..' components. Allow any directory + // separators though (think -I$src_root/foo on Windows). + // + if (d.absolute () && d.normalized (false)) + { + // If we have a candidate out_base, see if this is its src_base. + // + if (prev_ != nullptr) + { + const dir_path& bp (prev_->src_path ()); + + if (d.sub (bp)) + { + if (diff_.empty () || d.leaf (bp) == diff_) + { + // We've got a pair. + // + map_.emplace (move (d), prev_->out_path () / diff_); + prev_ = nullptr; // Taken. + return true; + } + } + + // Not a pair. Fall through to consider as out_base. + // + prev_ = nullptr; + } + + // See if this path is inside a project with an out-of-tree build and is + // in the out directory tree. + // + const scope& bs (ctx_.scopes.find_out (d)); + if (bs.root_scope () != nullptr) + { + if (!bs.out_eq_src ()) + { + const dir_path& bp (bs.out_path ()); + + bool e; + if ((e = (d == bp)) || d.sub (bp)) + { + prev_ = &bs; + if (e) + diff_.clear (); + else + diff_ = d.leaf (bp); + } + } + } + } + else + prev_ = nullptr; + + return false; + } + + pair dyndep_rule:: + enter_file (tracer& trace, const char* what, + action a, const scope& bs, target& t, + path&& f, bool cache, bool norm, + const function& map_extension, + const target_type& fallback, + const function& get_pfx_map, + const srcout_map& so_map) + { + // Find or maybe insert the target. The directory is only moved from if + // insert is true. Note that it must be normalized. + // + auto find = [&trace, what, &t, + &map_extension, &fallback] (dir_path&& d, + path&& f, + bool insert) -> const file* + { + // Split the file into its name part and extension. Here we can assume + // the name part is a valid filesystem name. + // + // Note that if the file has no extension, we record an empty extension + // rather than NULL (which would signify that the default extension + // should be added). + // + string e (f.extension ()); + string n (move (f).string ()); + + if (!e.empty ()) + n.resize (n.size () - e.size () - 1); // One for the dot. + + // See if this directory is part of any project and if so determine + // the target type. + // + // While at it also determine if this target is from the src or out + // tree of said project. + // + dir_path out; + + // It's possible the extension-to-target type mapping is ambiguous (for + // example, because both C and X-language headers use the same .h + // extension). In this case we will first try to find one that matches + // an explicit target (similar logic to when insert is false). + // + small_vector tts; + + // Note that the path can be in out or src directory and the latter + // can be associated with multiple scopes. So strictly speaking we + // need to pick one that is "associated" with us. But that is still a + // TODO (see scope_map::find() for details) and so for now we just + // pick the first one (it's highly unlikely the source file extension + // mapping will differ based on the configuration). + // + { + const scope& bs (**t.ctx.scopes.find (d).first); + if (const scope* rs = bs.root_scope ()) + { + if (map_extension != nullptr) + tts = map_extension (bs, n, e); + + if (!bs.out_eq_src () && d.sub (bs.src_path ())) + out = out_src (d, *rs); + } + } + + // If it is outside any project, or the project doesn't have such an + // extension, use the fallback target type. + // + if (tts.empty ()) + { + // If the project doesn't "know" this extension then we can't possibly + // find an explicit target of this type. + // + if (!insert) + { + l6 ([&]{trace << "unknown " << what << ' ' << n << " extension '" + << e << "'";}); + return nullptr; + } + + tts.push_back (&fallback); + } + + // Find or insert target. + // + // Note that in case of the target type ambiguity we first try to find + // an explicit target that resolves this ambiguity. + // + const target* r (nullptr); + + if (!insert || tts.size () > 1) + { + // Note that we skip any target type-specific searches (like for an + // existing file) and go straight for the target object since we + // need to find the target explicitly spelled out. + // + // Also, it doesn't feel like we should be able to resolve an + // absolute path with a spelled-out extension to multiple targets. + // + for (const target_type* tt: tts) + { + if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr) + break; + else + l6 ([&]{trace << "no targe with target type " << tt->name;}); + } + + // Note: we can't do this because of the in-source builds where there + // won't be explicit targets for non-generated files. + // + // This should be harmless, however, since in our world generated file + // are spelled-out as explicit targets. And if not, we will still get + // an error, just a bit less specific. + // +#if 0 + if (r == nullptr && insert) + { + f = d / n; + if (!e.empty ()) + { + f += '.'; + f += e; + } + + diag_record dr (fail); + dr << "ambiguous mapping of " << what ' ' << f << " to target type"; + for (const target_type* tt: tts) + dr << info << "could be " << tt->name << "{}"; + dr << info << "spell-out its target to resolve this ambiguity"; + } +#endif + } + + // @@ OPT: move d, out, n + // + if (r == nullptr && insert) + r = &search (t, *tts[0], d, out, n, &e, nullptr); + + return static_cast (r); + }; + + // If it's not absolute then it either does not (yet) exist or is a + // relative ""-include (see init_args() for details). Reduce the second + // case to absolute. + // + // Note: we now always use absolute path to the translation unit so this + // no longer applies. But let's keep it for posterity. + // +#if 0 + if (f.relative () && rels.relative ()) + { + // If the relative source path has a directory component, make sure it + // matches since ""-include will always start with that (none of the + // compilers we support try to normalize this path). Failed that we may + // end up searching for a generated header in a random (working) + // directory. + // + const string& fs (f.string ()); + const string& ss (rels.string ()); + + size_t p (path::traits::rfind_separator (ss)); + + if (p == string::npos || // No directory. + (fs.size () > p + 1 && + path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0)) + { + path t (work / f); // The rels path is relative to work. + + if (exists (t)) + f = move (t); + } + } +#endif + + const file* pt (nullptr); + bool remapped (false); + + // If still relative then it does not exist. + // + if (f.relative ()) + { + // This is probably as often an error as an auto-generated file, so + // trace at level 4. + // + l4 ([&]{trace << "non-existent " << what << " '" << f << "'";}); + + f.normalize (); + + // The relative path might still contain '..' (e.g., ../foo.hxx; + // presumably ""-include'ed). We don't attempt to support auto- + // generated files with such inclusion styles. + // + if (get_pfx_map != nullptr && f.normalized ()) + { + const prefix_map& pfx_map (get_pfx_map (a, bs, t)); + + // First try the whole file. Then just the directory. + // + // @@ Has to be a separate map since the prefix can be the same as + // the file name. + // + // auto i (pfx_map->find (f)); + + // Find the most qualified prefix of which we are a sub-path. + // + if (!pfx_map.empty ()) + { + dir_path d (f.directory ()); + auto p (pfx_map.sup_range (d)); + + if (p.first != p.second) + { + // Note that we can only have multiple entries for the + // prefixless mapping. + // + dir_path pd; // Reuse. + for (auto i (p.first); i != p.second; ++i) + { + // Note: value in pfx_map is not necessarily canonical. + // + pd = i->second.directory; + pd.canonicalize (); + + l4 ([&]{trace << "try prefix '" << d << "' mapped to " << pd;}); + + // If this is a prefixless mapping, then only use it if we can + // resolve it to an existing target (i.e., it is explicitly + // spelled out in a buildfile). @@ Hm, I wonder why, it's not + // like we can generate any file without an explicit target. + // Maybe for diagnostics (i.e., we will actually try to build + // something there instead of just saying no mapping). + // + pt = find (pd / d, f.leaf (), !i->first.empty ()); + if (pt != nullptr) + { + f = pd / f; + l4 ([&]{trace << "mapped as auto-generated " << f;}); + break; + } + else + l4 ([&]{trace << "no explicit target in " << pd;}); + } + } + else + l4 ([&]{trace << "no prefix map entry for '" << d << "'";}); + } + else + l4 ([&]{trace << "prefix map is empty";}); + } + } + else + { + // Normalize the path unless it comes from the depdb, in which case + // we've already done that (normally). This is also where we handle + // src-out remap (again, not needed if cached). + // + if (!cache || norm) + normalize_external (f, what); + + if (!cache) + { + if (!so_map.empty ()) + { + // Find the most qualified prefix of which we are a sub-path. + // + auto i (so_map.find_sup (f)); + if (i != so_map.end ()) + { + // Ok, there is an out tree for this file. Remap to a path from + // the out tree and see if there is a target for it. Note that the + // value in so_map is not necessarily canonical. + // + dir_path d (i->second); + d /= f.leaf (i->first).directory (); + d.canonicalize (); + + pt = find (move (d), f.leaf (), false); // d is not moved from. + + if (pt != nullptr) + { + path p (d / f.leaf ()); + l4 ([&]{trace << "remapping " << f << " to " << p;}); + f = move (p); + remapped = true; + } + } + } + } + + if (pt == nullptr) + { + l6 ([&]{trace << "entering " << f;}); + pt = find (f.directory (), f.leaf (), true); + } + } + + return make_pair (pt, remapped); + } +} diff --git a/libbuild2/dyndep.hxx b/libbuild2/dyndep.hxx new file mode 100644 index 0000000..3ba0c09 --- /dev/null +++ b/libbuild2/dyndep.hxx @@ -0,0 +1,168 @@ +// file : libbuild2/dyndep.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_DYNDEP_HXX +#define LIBBUILD2_DYNDEP_HXX + +#include +#include +#include + +#include +#include + +#include + +// Additional functionality that is normally only useful for implementing +// rules with dynamic dependencies. +// +namespace build2 +{ + class LIBBUILD2_SYMEXPORT dyndep_rule + { + public: + // Update the target during the match phase. Return true if it has changed + // or if the passed timestamp is not timestamp_unknown and is older than + // the target. + // + static bool + update (tracer&, action, const target&, timestamp); + + // Update and add to the list of prerequisite targets a prerequisite file + // target. + // + // Return the indication of whether it has changed or, if the passed + // timestamp is not timestamp_unknown, is older than this timestamp. If + // the prerequisite target does not exists nor can be generated (no rule), + // then issue diagnostics and fail if the fail argument is true and return + // nullopt otherwise. + // + static optional + inject_file (tracer&, const char* what, + action, target&, + const file& prerequiste, + timestamp, + bool fail); + + // Reverse-lookup target type(s) from file name/extension. + // + // If the list of base target types is specified, then only these types + // and those derived from them are considered. Otherwise, any file-based + // type is considered but not the file type itself. + // + static small_vector + map_extension (const scope& base, + const string& name, const string& ext, + const target_type* const* bases); + + // Mapping of inclusion prefixes (e.g., foo in #include ) for + // auto-generated files to inclusion search paths (e.g. -I) where they + // will be generated. + // + // We are using a prefix map of directories (dir_path_map) instead of just + // a map in order to also cover sub-paths (e.g., #include + // if we continue with the example). Specifically, we need to make sure we + // don't treat foobar as a sub-directory of foo. + // + // The priority is used to decide who should override whom. Lesser values + // are considered higher priority. Note that we allow multiple prefixless + // mapping (where priority is used to determine the order). For details, + // see append_prefix(). + // + // Note that the keys should be normalized. + // + struct prefix_value + { + dir_path directory; + size_t priority; + }; + + using prefix_map = dir_path_multimap; + + // Add the specified absolute and normalized inclusion search path into + // the prefix map of the specified target. + // + static void + append_prefix (tracer&, prefix_map&, const target&, dir_path); + + // Mapping of src inclusion search paths to the corresponding out paths + // for auto-generated files re-mapping. See cc::extract_headers() for + // background. + // + // Note that we use path_map instead of dir_path_map to allow searching + // using path (file path). + // + using srcout_map = path_map; + + class LIBBUILD2_SYMEXPORT srcout_builder + { + public: + srcout_builder (context& ctx, srcout_map& map): ctx_ (ctx), map_ (map) {} + + // Process next -I path. Return true if an entry was added to the map, + // in which case the passed path is moved from. + // + bool + next (dir_path&&); + + // Skip the previously cached first half. + // + void + skip () + { + prev_ = nullptr; + } + + private: + context& ctx_; + srcout_map& map_; + + // Previous -I's innermost scope if out_base plus the difference between + // the scope path and the -I path (normally empty). + // + const scope* prev_ = nullptr; + dir_path diff_; + }; + + // Enter a prerequisite file as a target. If the path is relative, then + // assume this a non-existent generated file. + // + // Depending on the cache flag, the path is assumed to either have come + // from the depdb cache or from the compiler run. In the former case + // assume the path is already normalized unless the normalize flag is + // true. + // + // Return the file target and an indication of whether it was remapped or + // NULL if the file does not exist and cannot be generated. In the latter + // case the passed file path is guaranteed to still be valid but might + // have been adjusted (e.g., normalized, etc). + // + // The map_extension function is used to reverse-map a file extension to + // the target type. The fallback target type is used if it's NULL or + // didn't return anything but only in situations where we are sure the + // file is (or should be there; see the implementation for details). + // + // The prefix map function is only called if this is a non-existent + // generated file (so it can be initialized lazily). If it's NULL, then + // generated files will not be supported. The srcout map is only consulted + // if cache is false (so its initialization can be delayed until the call + // with cache=false). + // + using map_extension_func = small_vector ( + const scope& base, const string& name, const string& ext); + + using prefix_map_func = const prefix_map& ( + action, const scope& base, const target&); + + static pair + enter_file (tracer&, const char* what, + action, const scope& base, target&, + path&& prerequisite, bool cache, bool norm, + const function&, + const target_type& fallback, + const function&, + const srcout_map&); + }; +} + +#endif // LIBBUILD2_DYNDEP_HXX diff --git a/libbuild2/filesystem.cxx b/libbuild2/filesystem.cxx index fbe145c..2e3309d 100644 --- a/libbuild2/filesystem.cxx +++ b/libbuild2/filesystem.cxx @@ -323,4 +323,59 @@ namespace build2 fail << "unable to set path " << p << " permissions: " << e; } } + + void + normalize_external (path& f, const char* what) + { + // The main motivating case for this logic are C/C++ headers. + // + // Interestingly, on most paltforms and with most compilers (Clang on + // Linux being a notable exception) most system/compiler headers are + // already normalized. + // + path_abnormality a (f.abnormalities ()); + if (a != path_abnormality::none) + { + // While we can reasonably expect this path to exit, things do go south + // from time to time (like compiling under wine with file wlantypes.h + // included as WlanTypes.h). + // + try + { + // If we have any parent components, then we have to verify the + // normalized path matches realized. + // + path r; + if ((a & path_abnormality::parent) == path_abnormality::parent) + { + r = f; + r.realize (); + } + + try + { + f.normalize (); + + // Note that we might still need to resolve symlinks in the + // normalized path. + // + if (!r.empty () && f != r && path (f).realize () != r) + f = move (r); + } + catch (const invalid_path&) + { + assert (!r.empty ()); // Shouldn't have failed if no `..`. + f = move (r); // Fallback to realize. + } + } + catch (const invalid_path&) + { + fail << "invalid " << what << " path '" << f.string () << "'"; + } + catch (const system_error& e) + { + fail << "invalid " << what << " path '" << f.string () << "': " << e; + } + } + } } diff --git a/libbuild2/filesystem.hxx b/libbuild2/filesystem.hxx index ee7ba9a..565e832 100644 --- a/libbuild2/filesystem.hxx +++ b/libbuild2/filesystem.hxx @@ -189,6 +189,35 @@ namespace build2 LIBBUILD2_SYMEXPORT void path_perms (const path&, permissions); + + // Normalize an absolute path to an existing file that may reside outside of + // any project and could involve funny filesystem business (e.g., relative + // directory symlinks). For example, a C/C++ header path returned by a + // compiler which could be a system header. + // + // We used to just normalize such a path but that could result in an invalid + // path (e.g., for some system/compiler headers on CentOS 7 with Clang 3.4) + // because of the symlinks (if a directory component is a symlink, then any + // following `..` are resolved relative to the target; see path::normalize() + // for background). + // + // Initially, to fix this, we realized (i.e., realpath(3)) it instead. But + // that turned out also not to be quite right since now we have all the + // symlinks resolved: conceptually it feels correct to keep the original + // header names since that's how the user chose to arrange things and + // practically this is how compilers see/report them (e.g., the GCC module + // mapper). + // + // So now we have a pretty elaborate scheme where we try to use the + // normalized path if possible and fallback to realized. Normalized paths + // will work for situations where `..` does not cross symlink boundaries, + // which is the sane case. And for the insane case we only really care + // about out-of-project files (i.e., system/compiler headers). In other + // words, if you have the insane case inside your project, then you are on + // your own. + // + LIBBUILD2_SYMEXPORT void + normalize_external (path&, const char* what); } #include diff --git a/libbuild2/make-parser.cxx b/libbuild2/make-parser.cxx new file mode 100644 index 0000000..d076a0a --- /dev/null +++ b/libbuild2/make-parser.cxx @@ -0,0 +1,137 @@ +// file : libbuild2/make-parser.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include + +namespace build2 +{ + auto make_parser:: + next (const string& l, + size_t& p, + const location& ll, + bool strict) -> pair + { + assert (state != end); + + pair r ( + next (l, p, !strict ? state == prereqs : optional ())); + + type t (state == prereqs ? type::prereq : type::target); + + // Deal with the end. + // + if (r.second) + { + if (state == begin && r.first.empty ()) + ; // Skip leading blank line. + else + { + if (state != prereqs) + fail (ll) << "end of make dependency declaration before ':'"; + + state = end; + } + } + // Deal with the first target. + // + else if (state == begin && !r.first.empty ()) + state = targets; + + // Deal with `:`. + // + if (p != l.size () && l[p] == ':') + { + switch (state) + { + case begin: fail (ll) << "':' before make target"; break; + case targets: state = prereqs; break; + case prereqs: fail (ll) << "':' after make prerequisite"; break; + case end: break; + } + + if (++p == l.size ()) + state = end; // Not a mere optimization: the caller will get next line. + } + + return pair (t, move (r.first)); + } + + pair make_parser:: + next (const string& l, size_t& p, optional prereq) + { + size_t n (l.size ()); + + // Skip leading spaces. + // + for (; p != n && l[p] == ' '; p++) ; + + // Lines containing multiple targets/prerequisites are customarily 80 + // characters max. + // + string r; + r.reserve (n - p); + + // Scan the next target/prerequisite while watching out for escape + // sequences. + // + // @@ Can't we do better for the (common) case where nothing is escaped? + // + for (char c, q (prereq && *prereq ? '\0' : ':'); + p != n && (c = l[p]) != ' ' && c != q; ) + { + // If we have another character, then handle the escapes. + // + if (++p != n) + { + if (c == '\\') + { + // This may or may not be an escape sequence depending on whether + // what follows is "escapable". + // + switch (c = l[p]) + { + case '\\': + case ' ': + case ':': ++p; break; + default: c = '\\'; // Restore. + } + } + else if (c == '$') + { + // Got to be another (escaped) '$'. + // + if (l[p] == '$') + ++p; + } + } + // Note that the newline escape is not necessarily separated with space. + // + else if (c == '\\') + { + --p; + break; + } + + r += c; + } + + // Skip trailing spaces. + // + for (; p != n && l[p] == ' '; p++) ; + + // Skip final '\' and determine if this is the end. + // + bool e (false); + if (p == n - 1) + { + if (l[p] == '\\') + p++; + } + else if (p == n) + e = true; + + return pair (move (r), e); + } +} diff --git a/libbuild2/make-parser.hxx b/libbuild2/make-parser.hxx new file mode 100644 index 0000000..fac2215 --- /dev/null +++ b/libbuild2/make-parser.hxx @@ -0,0 +1,85 @@ +// file : libbuild2/make-parser.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUILD2_MAKE_PARSER_HXX +#define LIBBUILD2_MAKE_PARSER_HXX + +#include +#include + +#include + +namespace build2 +{ + // Make dependency declaration parser. + // + // The format is line-based (but with potential line continuations) so we + // parse one line at a time. This allows the caller to bail out early (for + // example, on encountering a non-existent generated file). + // + class LIBBUILD2_SYMEXPORT make_parser + { + public: + enum {begin, targets, prereqs, end} state = begin; + + // Parse next target/prerequisite on a line starting from the specified + // position. Update the position to point to the start of the following + // target/prerequisite or l.size() if there is nothing left on this + // line. May return an empty string for a valid if unlikely dependency + // declarations (see below) or if passing leading blank lines (both of + // which should normally be just skipped). Issue diagnostics and throw + // failed if the declaration is invalid. + // + // If strict is false, then allow unescaped `:` in prerequisites. + // + // Note that the (p != l.size) should be in the do-while rather than in a + // while loop. In other words, except for the leading blank lines, the + // parser needs to see the blank line to correctly identify the end of the + // declaration. See make-parser.test.cxx for a recommended usage. + // + // To parse more than one declaration, reset the state to begin after + // reaching end. + // + enum class type {target, prereq}; + + pair + next (const string&, size_t&, const location&, bool strict); + + // Lower-level stateless API. + // + public: + // Parse next target/prerequisite on a line starting from the specified + // position. Return the target/prerequisite as well as an indication of + // whether the end of the dependency declaration was reached. Update the + // position to point to the start of the following target/prerequisite, + // `:`, or l.size() if there is nothing left on this line. + // + // Note that some broken tools (notably MinGW GCC) do not escape `:` + // properly. To tolerate such cases the caller may specify that what's + // being parsed is the prerequisite list in which case unescaped `:` will + // be treated literally. + // + // Note also that this function may return an empty string (with + // end=false) for a valid if unlikely dependency declaration, for example + // (using | to represent backslash): + // + // foo:| + // | + // bar + // + // It would also return an empty string (with end=true) if passed and + // empty or whitespace-only line. + // + // Note also that in the make language line continuations introduce a + // whitespace rather than just being remove. For example, the following + // declaration has two prerequisites: + // + // foo: bar| + // baz + // + static pair + next (const string&, size_t&, optional prereq = nullopt); + }; +} + +#endif // LIBBUILD2_MAKE_PARSER_HXX diff --git a/libbuild2/make-parser.test.cxx b/libbuild2/make-parser.test.cxx new file mode 100644 index 0000000..189407a --- /dev/null +++ b/libbuild2/make-parser.test.cxx @@ -0,0 +1,90 @@ +// file : libbuild2/make-parser.test.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +#include +#include + +#undef NDEBUG +#include + +using namespace std; + +namespace build2 +{ + int + main (int, char* argv[]) + { + bool strict (false); + + // Fake build system driver, default verbosity. + // + init_diag (1); + init (nullptr, argv[0]); + + path_name in (""); + + try + { + cin.exceptions (istream::badbit); + + using make_state = make_parser; + using make_type = make_parser::type; + + make_parser make; + + location ll (in, 1); + for (string l; !eof (getline (cin, l)); ++ll.line) + { + if (make.state == make_state::end) + { + cout << endl; + make.state = make_state::begin; + } + + // Skip leading blank lines to reduce output noise. + // + if (make.state == make_state::begin && l.empty ()) + continue; + + size_t pos (0); + do + { + pair r (make.next (l, pos, ll, strict)); + + cout << (r.first == make_type::target ? 'T' : 'P'); + + if (!r.second.empty ()) + cout << ' ' << r.second; + + cout << endl; + } + while (pos != l.size ()); + } + + if (make.state != make_state::end && make.state != make_state::begin) + fail (ll) << "incomplete make dependency declaration"; + } + catch (const io_error& e) + { + cerr << "unable to read stdin: " << e << endl; + return 1; + } + catch (const failed&) + { + return 1; + } + + return 0; + } +} + +int +main (int argc, char* argv[]) +{ + return build2::main (argc, argv); +} diff --git a/libbuild2/make-parser.test.testscript b/libbuild2/make-parser.test.testscript new file mode 100644 index 0000000..6db00eb --- /dev/null +++ b/libbuild2/make-parser.test.testscript @@ -0,0 +1,98 @@ +# file : libbuild2/make-parser.test.testscript +# license : MIT; see accompanying LICENSE file + +: valid +: +$* <>EOO + foo: + + foo: bar + + foo: bar baz + + foo: bar \ + baz + + foo: bar\ + baz + + foo:\ + bar baz\ + fox + + foo: bar \ + \ + baz + + foo: bar\ + + foo bar: baz + + foo \ + bar: baz + + foo \ + bar \ + : baz + + \ + foo: bar + EOI + T foo + + T foo + P bar + + T foo + P bar + P baz + + T foo + P bar + P baz + + T foo + P bar + P baz + + T foo + P + P bar + P baz + P fox + + T foo + P bar + P + P baz + + T foo + P bar + P + + T foo + T bar + P baz + + T foo + T bar + P baz + + T foo + T bar + T + P baz + + T + T foo + P bar + EOO + +: lax +: +$* <>EOO + foo: c:\tmp\bar + EOI + T foo + P c:\tmp\bar + EOO diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index fffe7bb..7722002 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -1313,7 +1313,7 @@ namespace build2 // Note that an option name and value can belong to different name // chunks. That's why we parse the env builtin arguments in the chunking // mode into the argument/location pair list up to the '--' separator - // and parse this list into the variable sets/unsets afterwords. + // and parse this list into the variable sets/unsets afterwards. // // Align the size with environment_vars (double because of -u // which is two arguments). diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx index f3b5cad..12078c4 100644 --- a/libbuild2/script/run.cxx +++ b/libbuild2/script/run.cxx @@ -944,6 +944,83 @@ namespace build2 : path (c.program.recall_string ()); } + // Read out the stream content into a string. Throw io_error on the + // underlying OS error. + // + // If the execution deadline is specified, then turn the stream into the + // non-blocking mode reading its content in chunks and with a single + // operation otherwise. If the specified deadline is reached while + // reading the stream, then bail out for the successful deadline and + // fail otherwise. Note that in the former case the result will be + // incomplete, but we leave it to the caller to handle that. + // + // Note that on Windows we can only turn pipe file descriptors into the + // non-blocking mode. Thus, we have no choice but to read from + // descriptors of other types synchronously there. That implies that we + // can potentially block indefinitely reading a file and missing the + // deadline on Windows. Note though, that the user can normally rewrite + // the command, for example, `set foo <<& dl, + const command& deadline_cmd, + const location& ll) + { + string r; + ifdstream cin; + +#ifndef _WIN32 + if (dl) +#else + if (dl && pipe) +#endif + { + fdselect_set fds {in.get ()}; + cin.open (move (in), fdstream_mode::non_blocking); + + const timestamp& dlt (dl->value); + + for (char buf[4096];; ) + { + timestamp now (system_clock::now ()); + + if (dlt <= now || ifdselect (fds, dlt - now) == 0) + { + if (!dl->success) + fail (ll) << cmd_path (deadline_cmd) + << " terminated: execution timeout expired"; + else + break; + } + + streamsize n (cin.readsome (buf, sizeof (buf))); + + // Bail out if eos is reached. + // + if (n == 0) + break; + + r.append (buf, n); + } + } + else + { + cin.open (move (in)); + r = cin.read_text (); + } + + cin.close (); + + return r; + } + // The set pseudo-builtin: set variable from the stdin input. // // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [] @@ -952,11 +1029,7 @@ namespace build2 set_builtin (environment& env, const strings& args, auto_fd in, -#ifndef _WIN32 - bool, -#else bool pipe, -#endif const optional& dl, const command& deadline_cmd, const location& ll) @@ -988,70 +1061,9 @@ namespace build2 fail (ll) << "set: empty variable name"; // Read out the stream content into a string while keeping an eye on - // the deadline. Then parse it according to the split mode. + // the deadline. // - string s; - { - ifdstream cin; - - // If the execution deadline is specified, then turn the stream into - // the non-blocking mode reading its content in chunks and with a - // single operation otherwise. If the specified deadline is reached - // while reading the stream, then bail out for the successful - // deadline and fail otherwise. Note that in the former case the - // variable value will be incomplete, but we leave it to the caller - // to handle that. - // - // Note that on Windows we can only turn pipe file descriptors into - // the non-blocking mode. Thus, we have no choice but to read from - // descriptors of other types synchronously there. That implies that - // we can potentially block indefinitely reading a file and missing - // the deadline on Windows. Note though, that the user can always - // rewrite `set foo <<value); - - for (char buf[4096];; ) - { - timestamp now (system_clock::now ()); - - if (dlt <= now || ifdselect (fds, dlt - now) == 0) - { - if (!dl->success) - fail (ll) << cmd_path (deadline_cmd) - << " terminated: execution timeout expired"; - else - break; - } - - streamsize n (cin.readsome (buf, sizeof (buf))); - - // Bail out if eos is reached. - // - if (n == 0) - break; - - s.append (buf, n); - } - } - else - { - cin.open (move (in)); - s = cin.read_text (); - } - - cin.close (); - } + string s (read (move (in), pipe, dl, deadline_cmd, ll)); // Parse the stream content into the variable value. // @@ -1137,7 +1149,7 @@ namespace build2 } catch (const io_error& e) { - fail (ll) << "set: " << e; + fail (ll) << "set: unable to read from stdin: " << e; } catch (const cli::exception& e) { @@ -1202,14 +1214,42 @@ namespace build2 auto_fd ifd, size_t ci, size_t li, const location& ll, bool diag, + string* output, optional dl = nullopt, const command* dl_cmd = nullptr, // env -t pipe_command* prev_cmd = nullptr) { tracer trace ("script::run_pipe"); - if (bc == ec) // End of the pipeline. + // At the end of the pipeline read out its stdout, if requested. + // + if (bc == ec) + { + if (output != nullptr) + { + // The pipeline can't be empty. + // + assert (ifd != nullfd && prev_cmd != nullptr); + + const command& c (prev_cmd->cmd); + + try + { + *output = read (move (ifd), + true /* pipe */, + dl, + dl_cmd != nullptr ? *dl_cmd : c, + ll); + } + catch (const io_error& e) + { + fail (ll) << "io error reading " << cmd_path (c) << " output: " + << e; + } + } + return true; + } // The overall plan is to run the first command in the pipe, reading its // input from the file descriptor passed (or, for the first command, @@ -1261,6 +1301,11 @@ namespace build2 command_pipe::const_iterator nc (bc + 1); bool last (nc == ec); + // Make sure that stdout is not redirected if meant to be read. + // + if (last && output != nullptr && c.out) + fail (ll) << "stdout cannot be redirected"; + // True if the process path is not pre-searched and the program path // still needs to be resolved. // @@ -1272,7 +1317,7 @@ namespace build2 const redirect& in ((c.in ? *c.in : env.in).effective ()); - const redirect* out (!last + const redirect* out (!last || output != nullptr ? nullptr // stdout is piped. : &(c.out ? *c.out : env.out).effective ()); @@ -1340,6 +1385,9 @@ namespace build2 if (c.out) fail (ll) << program << " builtin stdout cannot be redirected"; + if (output != nullptr) + fail (ll) << program << " builtin stdout cannot be read"; + if (c.err) fail (ll) << program << " builtin stderr cannot be redirected"; @@ -1529,6 +1577,9 @@ namespace build2 if (c.out) fail (ll) << "set builtin stdout cannot be redirected"; + if (output != nullptr) + fail (ll) << "set builtin stdout cannot be read"; + if (c.err) fail (ll) << "set builtin stderr cannot be redirected"; @@ -1661,7 +1712,7 @@ namespace build2 // script failures investigation and, for example, for validation // "tightening". // - if (last) + if (last && out != nullptr) ofd.out = open (*out, 1, osp); else { @@ -1690,7 +1741,7 @@ namespace build2 fail (ll) << "stdout and stderr redirected to each other"; auto_fd& self (mo ? ofd.out : efd); - auto_fd& other (mo ? efd : ofd.out); + auto_fd& other (mo ? efd : ofd.out); try { @@ -1704,9 +1755,9 @@ namespace build2 } } - // All descriptors should be open to the date. + // By now all descriptors should be open. // - assert (ofd.out.get () != -1 && efd.get () != -1); + assert (ofd.out != nullfd && efd != nullfd); // Wait for a process/builtin to complete until the deadline is reached // and return the underlying wait function result (optional). @@ -1756,7 +1807,7 @@ namespace build2 // is exiting on Windows, etc) then just ignore this, postponing // the potential failure till the kill() call. // - l5 ([&]{trace (c->loc) <<"unable to terminate " << prog (c) + l5 ([&]{trace (c->loc) << "unable to terminate " << prog (c) << ": " << e;}); } @@ -2123,6 +2174,7 @@ namespace build2 nc, ec, move (ofd.in), ci + 1, li, ll, diag, + output, dl, dl_cmd, &pc); @@ -2249,6 +2301,7 @@ namespace build2 nc, ec, move (ofd.in), ci + 1, li, ll, diag, + output, dl, dl_cmd, &pc); @@ -2376,7 +2429,7 @@ namespace build2 if (success) success = check_output (pr, esp, isp, err, ll, env, diag, "stderr") && - (!last || + (out == nullptr || check_output (pr, osp, isp, *out, ll, env, diag, "stdout")); return success; @@ -2386,7 +2439,8 @@ namespace build2 run_expr (environment& env, const command_expr& expr, size_t li, const location& ll, - bool diag) + bool diag, + string* output) { // Commands are numbered sequentially throughout the expression // starting with 1. Number 0 means the command is a single one. @@ -2424,10 +2478,15 @@ namespace build2 // with false. // if (!((or_op && r) || (!or_op && !r))) + { + assert (!p.empty ()); + r = run_pipe (env, p.begin (), p.end (), auto_fd (), - ci, li, ll, print); + ci, li, ll, print, + output); + } ci += p.size (); } @@ -2438,24 +2497,26 @@ namespace build2 void run (environment& env, const command_expr& expr, - size_t li, const location& ll) + size_t li, const location& ll, + string* output) { // Note that we don't print the expression at any verbosity level // assuming that the caller does this, potentially providing some // additional information (command type, etc). // - if (!run_expr (env, expr, li, ll, true /* diag */)) + if (!run_expr (env, expr, li, ll, true /* diag */, output)) throw failed (); // Assume diagnostics is already printed. } bool run_if (environment& env, const command_expr& expr, - size_t li, const location& ll) + size_t li, const location& ll, + string* output) { // Note that we don't print the expression here (see above). // - return run_expr (env, expr, li, ll, false /* diag */); + return run_expr (env, expr, li, ll, false /* diag */, output); } void diff --git a/libbuild2/script/run.hxx b/libbuild2/script/run.hxx index 477dd88..8bc246c 100644 --- a/libbuild2/script/run.hxx +++ b/libbuild2/script/run.hxx @@ -38,11 +38,22 @@ namespace build2 // Location is the start position of this command line in the script. It // can be used in diagnostics. // + // Optionally, save the command output into the referenced variable. In + // this case assume that the expression contains a single pipline. + // void - run (environment&, const command_expr&, size_t index, const location&); + run (environment&, + const command_expr&, + size_t index, + const location&, + string* output = nullptr); bool - run_if (environment&, const command_expr&, size_t, const location&); + run_if (environment&, + const command_expr&, + size_t index, + const location&, + string* output = nullptr); // Perform the registered special file cleanups in the direct order and // then the regular cleanups in the reverse order. diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx index d162900..81bc13c 100644 --- a/libbuild2/script/script.hxx +++ b/libbuild2/script/script.hxx @@ -495,7 +495,8 @@ namespace build2 // Register a cleanup. If the cleanup is explicit, then override the // cleanup type if this path is already registered. Ignore implicit - // registration of a path outside root directory (see below). + // registration of a path outside sandbox directory, if specified (see + // above). // void clean (cleanup, bool implicit); -- cgit v1.1