diff options
Diffstat (limited to 'libbuild2/cc/compile-rule.cxx')
-rw-r--r-- | libbuild2/cc/compile-rule.cxx | 4449 |
1 files changed, 2718 insertions, 1731 deletions
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index b96c39d..2e4775e 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -3,8 +3,11 @@ #include <libbuild2/cc/compile-rule.hxx> +#include <cerrno> #include <cstdlib> // exit() -#include <cstring> // strlen(), strchr() +#include <cstring> // strlen(), strchr(), strncmp() + +#include <libbutl/path-pattern.hxx> #include <libbuild2/file.hxx> #include <libbuild2/depdb.hxx> @@ -14,6 +17,7 @@ #include <libbuild2/algorithm.hxx> #include <libbuild2/filesystem.hxx> // mtime() #include <libbuild2/diagnostics.hxx> +#include <libbuild2/make-parser.hxx> #include <libbuild2/bin/target.hxx> @@ -172,14 +176,71 @@ namespace build2 if (s == "includes") return preprocessed::includes; if (s == "modules") return preprocessed::modules; if (s == "all") return preprocessed::all; - throw invalid_argument ("invalid preprocessed value '" + s + "'"); + throw invalid_argument ("invalid preprocessed value '" + s + '\''); + } + + // Return true if the compiler supports -isystem (GCC class) or + // /external:I (MSVC class). + // + static inline bool + isystem (const data& d) + { + switch (d.cclass) + { + case compiler_class::gcc: + { + return true; + } + case compiler_class::msvc: + { + if (d.cvariant.empty ()) + { + // While /external:I is available since 15.6, it required + // /experimental:external (and was rather buggy) until 16.10. + // + return d.cmaj > 19 || (d.cmaj == 19 && d.cmin >= 29); + } + else if (d.cvariant != "clang") + { + // clang-cl added support for /external:I (by translating it to + // -isystem) in version 13. + // + return d.cvmaj >= 13; + } + else + return false; + } + } + + return false; } + optional<path> compile_rule:: + find_system_header (const path& f) const + { + path p; // Reuse the buffer. + for (const dir_path& d: sys_hdr_dirs) + { + if (file_exists ((p = d, p /= f), + true /* follow_symlinks */, + true /* ignore_errors */)) + return p; + } + + return nullopt; + } + + // Note that we don't really need this for clean (where we only need + // unrefined unit type) so we could make this update-only. But let's keep + // it simple for now. Note that now we do need the source prerequisite + // type in clean to deal with Objective-X. + // struct compile_rule::match_data { - explicit - match_data (unit_type t, const prerequisite_member& s) - : type (t), src (s) {} + match_data (const compile_rule& r, + unit_type t, + const prerequisite_member& s) + : type (t), src (s), rule (r) {} unit_type type; preprocessed pp = preprocessed::none; @@ -188,54 +249,87 @@ namespace build2 bool touch = false; // Target needs to be touched. timestamp mt = timestamp_unknown; // Target timestamp. prerequisite_member src; - auto_rmfile psrc; // Preprocessed source, if any. + file_cache::entry psrc; // Preprocessed source, if any. path dd; // Dependency database path. size_t header_units = 0; // Number of imported header units. module_positions modules = {0, 0, 0}; // Positions of imported modules. + + const compile_rule& rule; + + target_state + operator() (action a, const target& t) + { + return rule.perform_update (a, t, *this); + } }; compile_rule:: - compile_rule (data&& d) + compile_rule (data&& d, const scope& rs) : common (move (d)), - rule_id (string (x) += ".compile 4") + rule_id (string (x) += ".compile 6") { - static_assert (sizeof (match_data) <= target::data_size, - "insufficient space"); + // Locate the header cache (see enter_header() for details). + // + { + string mn (string (x) + ".config"); + + header_cache_ = rs.find_module<config_module> (mn); // Must be there. + + const scope* ws (rs.weak_scope ()); + if (ws != &rs) + { + const scope* s (&rs); + do + { + s = s->parent_scope ()->root_scope (); + + if (const auto* m = s->find_module<config_module> (mn)) + header_cache_ = m; + + } while (s != ws); + } + } } template <typename T> void compile_rule:: - append_sys_inc_options (T& args) const + append_sys_hdr_options (T& args) const { - assert (sys_inc_dirs_extra <= sys_inc_dirs.size ()); + assert (sys_hdr_dirs_mode + sys_hdr_dirs_extra <= sys_hdr_dirs.size ()); // Note that the mode options are added as part of cmode. // - auto b (sys_inc_dirs.begin () + sys_inc_dirs_mode); - auto m (sys_inc_dirs.begin () + sys_inc_dirs_extra); - auto e (sys_inc_dirs.end ()); + auto b (sys_hdr_dirs.begin () + sys_hdr_dirs_mode); + auto x (b + sys_hdr_dirs_extra); - // Note: starting from 15.6, MSVC gained /external:I option though it + // Add extras. + // + // Note: starting from 16.10, MSVC gained /external:I option though it // doesn't seem to affect the order, only "system-ness". // append_option_values ( args, - cclass == compiler_class::gcc ? "-idirafter" : - cclass == compiler_class::msvc ? "/I" : "-I", - m, e, + cclass == compiler_class::gcc ? "-isystem" : + cclass == compiler_class::msvc ? (isystem (*this) + ? "/external:I" + : "/I") : "-I", + b, x, [] (const dir_path& d) {return d.string ().c_str ();}); // For MSVC if we have no INCLUDE environment variable set, then we // add all of them. But we want extras to come first. Note also that // clang-cl takes care of this itself. // + // Note also that we don't use /external:I to have consistent semantics + // with when INCLUDE is set (there is separate /external:env for that). + // if (ctype == compiler_type::msvc && cvariant != "clang") { if (!getenv ("INCLUDE")) { append_option_values ( args, "/I", - b, m, + x, sys_hdr_dirs.end (), [] (const dir_path& d) {return d.string ().c_str ();}); } } @@ -260,6 +354,35 @@ namespace build2 case lang::c: o1 = "/TC"; break; case lang::cxx: o1 = "/TP"; break; } + + // Note: /interface and /internalPartition are in addition to /TP. + // + switch (md.type) + { + case unit_type::non_modular: + case unit_type::module_impl: + { + break; + } + case unit_type::module_intf: + case unit_type::module_intf_part: + { + o2 = "/interface"; + break; + } + case unit_type::module_impl_part: + { + o2 = "/internalPartition"; + break; + } + case unit_type::module_header: + { + //@@ MODHDR TODO: /exportHeader + assert (false); + break; + } + } + break; } case compiler_class::gcc: @@ -278,11 +401,20 @@ namespace build2 case unit_type::module_impl: { o1 = "-x"; - switch (x_lang) + + if (x_assembler_cpp (md.src)) + o2 = "assembler-with-cpp"; + else { - case lang::c: o2 = "c"; break; - case lang::cxx: o2 = "c++"; break; + bool obj (x_objective (md.src)); + + switch (x_lang) + { + case lang::c: o2 = obj ? "objective-c" : "c"; break; + case lang::cxx: o2 = obj ? "objective-c++" : "c++"; break; + } } + break; } case unit_type::module_intf: @@ -322,9 +454,11 @@ namespace build2 default: assert (false); } + break; } } + break; } } @@ -348,7 +482,7 @@ namespace build2 } bool compile_rule:: - match (action a, target& t, const string&) const + match (action a, target& t) const { tracer trace (x, "compile_rule::match"); @@ -381,13 +515,15 @@ namespace build2 // For a header unit we check the "real header" plus the C header. // - if (ut == unit_type::module_header ? p.is_a (**x_hdr) || p.is_a<h> () : - ut == unit_type::module_intf ? p.is_a (*x_mod) : - p.is_a (x_src)) + if (ut == unit_type::module_header ? p.is_a (**x_hdrs) || p.is_a<h> () : + ut == unit_type::module_intf ? p.is_a (*x_mod) : + p.is_a (x_src) || + (x_asp != nullptr && p.is_a (*x_asp)) || + (x_obj != nullptr && p.is_a (*x_obj))) { // Save in the target's auxiliary storage. // - t.data (match_data (ut, p)); + t.data (a, match_data (*this, ut, p)); return true; } } @@ -398,41 +534,49 @@ namespace build2 // Append or hash library options from a pair of *.export.* variables // (first is x.* then cc.*) recursively, prerequisite libraries first. + // If common is true, then only append common options from the lib{} + // groups. // template <typename T> void compile_rule:: append_library_options (appended_libraries& ls, T& args, const scope& bs, - action a, const file& l, bool la, linfo li) const + const scope* is, // Internal scope. + action a, const file& l, bool la, + linfo li, bool common, + library_cache* lib_cache) const { struct data { appended_libraries& ls; T& args; - } d {ls, args}; + const scope* is; + } d {ls, args, is}; // See through utility libraries. // - auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();}; + auto imp = [] (const target& l, bool la) {return la && l.is_a<libux> ();}; - auto opt = [&d, this] (const file& l, + auto opt = [&d, this] (const target& l, // Note: could be lib{} const string& t, bool com, bool exp) { // Note that in our model *.export.poptions are always "interface", // even if set on liba{}/libs{}, unlike loptions. // if (!exp) // Ignore libux. - return; + return true; // Suppress duplicates. // // Compilation is the simple case: we can add the options on the first - // occurrence of the library and ignore all subsequent occurrences. - // See GitHub issue #114 for details. + // occurrence of the library and ignore (and prune) all subsequent + // occurrences. See GitHub issue #114 for details. // if (find (d.ls.begin (), d.ls.end (), &l) != d.ls.end ()) - return; + return false; + // Note: go straight for the public variable pool. + // const variable& var ( com ? c_export_poptions @@ -440,26 +584,166 @@ namespace build2 ? x_export_poptions : l.ctx.var_pool[t + ".export.poptions"])); - append_options (d.args, l, var); + if (const strings* ops = cast_null<strings> (l[var])) + { + // If enabled, remap -I to -isystem or /external:I for paths that + // are outside of the internal scope provided the library is not + // whitelisted. + // + auto whitelist = [&l] (const strings& pats) + { + return find_if (pats.begin (), pats.end (), + [&l] (const string& pat) + { + return path_match (l.name, pat); + }) != pats.end (); + }; + + const scope* is (d.is); + + if (is != nullptr && c_ilibs != nullptr && whitelist (*c_ilibs)) + is = nullptr; + + if (is != nullptr && x_ilibs != nullptr && whitelist (*x_ilibs)) + is = nullptr; + + for (auto i (ops->begin ()), e (ops->end ()); i != e; ++i) + { + const string& o (*i); + + if (is != nullptr) + { + // See if this is -I<dir> or -I <dir> (or /I... for MSVC). + // + // While strictly speaking we can only attempt to recognize + // options until we hit something unknown (after that, we don't + // know what's an option and what's a value), it doesn't seem + // likely to cause issues here, where we only expect to see -I, + // -D, and -U. + // + bool msvc (cclass == compiler_class::msvc); + + if ((o[0] == '-' || (msvc && o[0] == '/')) && o[1] == 'I') + { + bool sep (o.size () == 2); // -I<dir> vs -I <dir> + + const char* v (nullptr); + size_t vn (0); + if (sep) + { + if (i + 1 == e) + ; // Append as is and let the compiler complain. + else + { + ++i; + v = i->c_str (); + vn = i->size (); + } + } + else + { + v = o.c_str () + 2; + vn = o.size () - 2; + } + + if (v != nullptr) + { + // See if we need to translate the option for this path. We + // only do this for absolute paths and try to optimize for + // the already normalized ones. + // + if (path_traits::absolute (v)) + { + const char* p (nullptr); + size_t pn (0); + + dir_path nd; + if (path_traits::normalized (v, vn, true /* separators */)) + { + p = v; + pn = vn; + } + else + try + { + nd = dir_path (v, vn); + nd.normalize (); + p = nd.string ().c_str (); + pn = nd.string ().size (); + } + catch (const invalid_path&) + { + // Ignore this path. + } + + if (p != nullptr) + { + auto sub = [p, pn] (const dir_path& d) + { + return path_traits::sub ( + p, pn, + d.string ().c_str (), d.string ().size ()); + }; + + // Translate if it's neither in src nor in out of the + // internal scope. + // + if (!sub (is->src_path ()) && + (is->out_eq_src () || !sub (is->out_path ()))) + { + // Note: must use original value (path is temporary). + // + append_option (d.args, + msvc ? "/external:I" : "-isystem"); + append_option (d.args, v); + continue; + } + } + } + + // If not translated, preserve the original form. + // + append_option (d.args, o.c_str ()); + if (sep) append_option (d.args, v); + + continue; + } + } + } + + append_option (d.args, o.c_str ()); + } + } // From the process_libraries() semantics we know that the final call // is always for the common options. // if (com) d.ls.push_back (&l); + + return true; }; process_libraries (a, bs, li, sys_lib_dirs, - l, la, 0, // Hack: lflags unused. - imp, nullptr, opt); + l, la, 0, // lflags unused. + imp, nullptr, opt, + false /* self */, + common /* proc_opt_group */, + lib_cache); } void compile_rule:: append_library_options (appended_libraries& ls, strings& args, const scope& bs, - action a, const file& l, bool la, linfo li) const + action a, const file& l, bool la, + linfo li, + bool common, + bool original) const { - append_library_options<strings> (ls, args, bs, a, l, la, li); + const scope* is (!original && isystem (*this) + ? effective_iscope (bs) + : nullptr); + append_library_options (ls, args, bs, is, a, l, la, li, common, nullptr); } template <typename T> @@ -468,7 +752,16 @@ namespace build2 const scope& bs, action a, const target& t, linfo li) const { + auto iscope = [this, &bs, is = optional<const scope*> ()] () mutable + { + if (!is) + is = isystem (*this) ? effective_iscope (bs) : nullptr; + + return *is; + }; + appended_libraries ls; + library_cache lc; for (prerequisite_member p: group_prerequisite_members (a, t)) { @@ -488,7 +781,13 @@ namespace build2 (la = (f = pt->is_a<libux> ())) || ( (f = pt->is_a<libs> ()))) { - append_library_options (ls, args, bs, a, *f, la, li); + append_library_options (ls, + args, + bs, iscope (), + a, *f, la, + li, + false /* common */, + &lc); } } } @@ -498,35 +797,61 @@ namespace build2 // recursively, prerequisite libraries first. // void compile_rule:: - append_library_prefixes (prefix_map& m, + append_library_prefixes (appended_libraries& ls, prefix_map& pm, const scope& bs, - action a, - target& t, - linfo li) const + action a, const target& t, linfo li) const { - auto imp = [] (const file& l, bool la) {return la && l.is_a<libux> ();}; + struct data + { + appended_libraries& ls; + prefix_map& pm; + } d {ls, pm}; + + auto imp = [] (const target& l, bool la) {return la && l.is_a<libux> ();}; - auto opt = [&m, this] ( - const file& l, const string& t, bool com, bool exp) + auto opt = [&d, this] (const target& lt, + const string& t, bool com, bool exp) { if (!exp) - return; + return true; - const variable& var ( - com - ? c_export_poptions - : (t == x - ? x_export_poptions - : l.ctx.var_pool[t + ".export.poptions"])); + const file& l (lt.as<file> ()); + + // Suppress duplicates like in append_library_options(). + // + if (find (d.ls.begin (), d.ls.end (), &l) != d.ls.end ()) + return false; + + // If this target does not belong to any project (e.g, an "imported as + // installed" library), then it can't possibly generate any headers + // for us. + // + if (const scope* rs = l.base_scope ().root_scope ()) + { + // Note: go straight for the public variable pool. + // + const variable& var ( + com + ? c_export_poptions + : (t == x + ? x_export_poptions + : l.ctx.var_pool[t + ".export.poptions"])); + + append_prefixes (d.pm, *rs, l, var); + } - append_prefixes (m, l, var); + if (com) + d.ls.push_back (&l); + + return true; }; // The same logic as in append_library_options(). // - const function<bool (const file&, bool)> impf (imp); - const function<void (const file&, const string&, bool, bool)> optf (opt); + const function<bool (const target&, bool)> impf (imp); + const function<bool (const target&, const string&, bool, bool)> optf (opt); + library_cache lib_cache; for (prerequisite_member p: group_prerequisite_members (a, t)) { if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. @@ -544,75 +869,15 @@ namespace build2 continue; process_libraries (a, bs, li, sys_lib_dirs, - pt->as<file> (), la, 0, // Hack: lflags unused. - impf, nullptr, optf); + pt->as<file> (), la, 0, // lflags unused. + impf, nullptr, optf, + false /* self */, + false /* proc_opt_group */, + &lib_cache); } } } - // Update the target during the match phase. Return true if it has changed - // or if the passed timestamp is not timestamp_unknown and is older than - // the target. - // - // This function is used to make sure header dependencies are up to date. - // - // There would normally be a lot of headers for every source file (think - // all the system headers) and just calling execute_direct() on all of - // them can get expensive. At the same time, most of these headers are - // existing files that we will never be updating (again, system headers, - // for example) and the rule that will match them is the fallback - // file_rule. That rule has an optimization: it returns noop_recipe (which - // causes the target state to be automatically set to unchanged) if the - // file is known to be up to date. So we do the update "smartly". - // - static bool - update (tracer& trace, action a, const target& t, timestamp ts) - { - const path_target* pt (t.is_a<path_target> ()); - - if (pt == nullptr) - ts = timestamp_unknown; - - target_state os (t.matched_state (a)); - - if (os == target_state::unchanged) - { - if (ts == timestamp_unknown) - return false; - else - { - // We expect the timestamp to be known (i.e., existing file). - // - timestamp mt (pt->mtime ()); - assert (mt != timestamp_unknown); - return mt > ts; - } - } - else - { - // We only want to return true if our call to execute() actually - // caused an update. In particular, the target could already have been - // in target_state::changed because of a dependency extraction run for - // some other source file. - // - // @@ MT perf: so we are going to switch the phase and execute for - // any generated header. - // - phase_switch ps (t.ctx, run_phase::execute); - target_state ns (execute_direct (a, t)); - - if (ns != os && ns != target_state::unchanged) - { - l6 ([&]{trace << "updated " << t - << "; old state " << os - << "; new state " << ns;}); - return true; - } - else - return ts != timestamp_unknown ? pt->newer (ts, ns) : false; - } - } - recipe compile_rule:: apply (action a, target& xt) const { @@ -620,7 +885,7 @@ namespace build2 file& t (xt.as<file> ()); // Either obj*{} or bmi*{}. - match_data& md (t.data<match_data> ()); + match_data& md (t.data<match_data> (a)); context& ctx (t.ctx); @@ -713,7 +978,9 @@ namespace build2 // // Note: ut is still unrefined. // - if (ut == unit_type::module_intf && cast_true<bool> (t[b_binless])) + if ((ut == unit_type::module_intf || + ut == unit_type::module_intf_part || + ut == unit_type::module_impl_part) && cast_true<bool> (t[b_binless])) { // The module interface unit can be the same as an implementation // (e.g., foo.mxx and foo.cxx) which means obj*{} targets could @@ -746,8 +1013,6 @@ namespace build2 // wait_guard wg (ctx, ctx.count_busy (), t[a].task_count, true); - target_state src_ts1 (target_state::unknown), src_ts2 (src_ts1); - size_t src_i (~0); // Index of src target. size_t start (pts.size ()); // Index of the first to be added. for (prerequisite_member p: group_prerequisite_members (a, t)) @@ -759,8 +1024,8 @@ namespace build2 continue; // A dependency on a library is there so that we can get its - // *.export.poptions, modules, etc. This is the library metadata - // protocol. See also append_library_options(). + // *.export.poptions, modules, importable headers, etc. This is the + // library metadata protocol. See also append_library_options(). // if (pi == include_type::normal && (p.is_a<libx> () || @@ -773,8 +1038,14 @@ namespace build2 // Handle (phase two) imported libraries. We know that for such // libraries we don't need to do match() in order to get options // (if any, they would be set by search_library()). But we do need - // to match it if we may need its modules (see search_modules() - // for details). + // to match it if we may need its modules or importable headers + // (see search_modules(), make_header_sidebuild() for details). + // + // Well, that was the case until we've added support for immediate + // importation of libraries, which happens during the load phase + // and natually leaves the library unmatched. While we could have + // returned from search_library() an indication of whether the + // library has been matched, this doesn't seem worth the trouble. // if (p.proj ()) { @@ -783,8 +1054,10 @@ namespace build2 usr_lib_dirs, p.prerequisite); +#if 0 if (pt != nullptr && !modules) continue; +#endif } if (pt == nullptr) @@ -812,28 +1085,21 @@ namespace build2 { pt = &p.search (t); - if (a.operation () == clean_id && !pt->dir.sub (rs.out_path ())) + if (pt == dir || + (a.operation () == clean_id && !pt->dir.sub (rs.out_path ()))) continue; } - target_state ts ( - match_async (a, *pt, ctx.count_busy (), t[a].task_count)); + match_async (a, *pt, ctx.count_busy (), t[a].task_count); if (p == md.src) - { src_i = pts.size (); - src_ts1 = ts; - } pts.push_back (prerequisite_target (pt, pi)); } - size_t src_tc1 (t[a].task_count.load (memory_order_consume)); - wg.wait (); - size_t src_tc2 (t[a].task_count.load (memory_order_consume)); - // Finish matching all the targets that we have started. // for (size_t i (start), n (pts.size ()); i != n; ++i) @@ -847,8 +1113,13 @@ namespace build2 // match in link::apply() it will be safe unless someone is building // an obj?{} target directly. // + // @@ If for some reason unmatch fails, this messes up the for_install + // logic because we will update this library during match. Perhaps + // we should postpone updating them until execute if we failed to + // unmatch. See how we do this in ad hoc rule. + // pair<bool, target_state> mr ( - build2::match ( + match_complete ( a, *pt, pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> () @@ -857,8 +1128,6 @@ namespace build2 if (mr.first) pt = nullptr; // Ignore in execute. - else if (i == src_i) - src_ts2 = mr.second; } // Inject additional prerequisites. We only do it when performing update @@ -884,6 +1153,8 @@ namespace build2 md.symexport = l ? cast<bool> (l) : symexport; } + // NOTE: see similar code in adhoc_buildscript_rule::apply(). + // Make sure the output directory exists. // // Is this the right thing to do? It does smell a bit, but then we do @@ -907,12 +1178,14 @@ namespace build2 // this can very well be happening in parallel. But that's not a // problem since fsdir{}'s update is idempotent. // - fsdir_rule::perform_update_direct (a, t); + fsdir_rule::perform_update_direct (a, *dir); } // Note: the leading '@' is reserved for the module map prefix (see // extract_modules()) and no other line must start with it. // + // NOTE: see also the predefs rule if changing anything here. + // depdb dd (tp + ".d"); // First should come the rule name/version. @@ -927,6 +1200,11 @@ namespace build2 if (dd.expect (cast<string> (rs[x_checksum])) != nullptr) l4 ([&]{trace << "compiler mismatch forcing update of " << t;}); + // Then the compiler environment checksum. + // + if (dd.expect (env_checksum) != nullptr) + l4 ([&]{trace << "environment mismatch forcing update of " << t;}); + // Then the options checksum. // // The idea is to keep them exactly as they are passed to the compiler @@ -943,9 +1221,30 @@ namespace build2 if (ut == unit_type::module_intf) // Note: still unrefined. cs.append (&md.symexport, sizeof (md.symexport)); - if (xlate_hdr != nullptr) - append_options (cs, *xlate_hdr); - + // If we track translate_include then we should probably also track + // the cc.importable flag for each header we include, which would be + // quite heavy-handed indeed. Or maybe we shouldn't bother with this + // at all: after all include translation is an optimization so why + // rebuild an otherwise up-to-date target? + // +#if 0 + if (modules) + { + // While there is also the companion importable_headers map, it's + // unlikely to change in a way that affects us without changes to + // other things that we track (e.g., compiler version, etc). + // + if (const auto* v = cast_null<translatable_headers> ( + t[x_translate_include])) + { + for (const auto& p: *v) + { + cs.append (p.first); + cs.append (!p.second || *p.second); + } + } + } +#endif if (md.pp != preprocessed::all) { append_options (cs, t, x_poptions); @@ -958,19 +1257,10 @@ namespace build2 append_options (cs, t, c_coptions); append_options (cs, t, x_coptions); - - if (ot == otype::s) - { - // On Darwin, Win32 -fPIC is the default. - // - if (tclass == "linux" || tclass == "bsd") - cs.append ("-fPIC"); - } - append_options (cs, cmode); if (md.pp != preprocessed::all) - append_sys_inc_options (cs); // Extra system header dirs (last). + append_sys_hdr_options (cs); // Extra system header dirs (last). if (dd.expect (cs.string ()) != nullptr) l4 ([&]{trace << "options mismatch forcing update of " << t;}); @@ -980,32 +1270,8 @@ namespace build2 // { const path& p (src.path ()); - - // @@ TMP: we seem to have a race condition here but can't quite put - // our finger on it. - // - // NOTE: remember to get rid of src_ts*, etc., once done. - // -#if 0 assert (!p.empty ()); // Sanity check. -#else - if (p.empty ()) - { - target_state src_ts3 (src.matched_state (a, false)); - - info << "unassigned path for target " << src << - info << "is empty_path: " << (&p == &empty_path) << - info << "target state 1: " << src_ts1 << - info << "target state 2: " << src_ts2 << - info << "target state 3: " << src_ts3 << - info << "target count 1: " << src_tc1 << - info << "target count 2: " << src_tc2 << - info << "please report at " - << "https://github.com/build2/build2/issues/89"; - - assert (!p.empty ()); - } -#endif + if (dd.expect (p) != nullptr) l4 ([&]{trace << "source file mismatch forcing update of " << t;}); } @@ -1077,14 +1343,14 @@ namespace build2 // If we have no #include directives (or header unit imports), then // skip header dependency extraction. // - pair<auto_rmfile, bool> psrc (auto_rmfile (), false); + pair<file_cache::entry, bool> psrc (file_cache::entry (), false); if (md.pp < preprocessed::includes) { // Note: trace is used in a test. // l5 ([&]{trace << "extracting headers from " << src;}); auto& is (tu.module_info.imports); - psrc = extract_headers (a, bs, t, li, src, md, dd, u, mt, is); + extract_headers (a, bs, t, li, src, md, dd, u, mt, is, psrc); is.clear (); // No longer needed. } @@ -1139,6 +1405,10 @@ namespace build2 // if (mt != timestamp_nonexistent) { + // Appended to by to_module_info() below. + // + tu.module_info.imports.clear (); + u = false; md.touch = true; } @@ -1223,21 +1493,6 @@ namespace build2 extract_modules (a, bs, t, li, tts, src, md, move (tu.module_info), dd, u); - - // Currently in VC module interface units must be compiled from - // the original source (something to do with having to detect and - // store header boundaries in the .ifc files). - // - // @@ MODHDR MSVC: should we do the same for header units? I guess - // we will figure it out when MSVC supports header units. - // - // @@ TMP: probably outdated. Probably the same for partitions. - // - if (ctype == compiler_type::msvc) - { - if (ut == unit_type::module_intf) - psrc.second = false; - } } } @@ -1256,10 +1511,10 @@ namespace build2 // to keep re-validating the file on every subsequent dry-run as well // on the real run). // - if (u && dd.reading () && !ctx.dry_run) - dd.touch = true; + if (u && dd.reading () && !ctx.dry_run_option) + dd.touch = timestamp_unknown; - dd.close (); + dd.close (false /* mtime_check */); md.dd = move (dd.path); // If the preprocessed output is suitable for compilation, then pass @@ -1269,22 +1524,33 @@ namespace build2 { md.psrc = move (psrc.first); + // Now is also the right time to unpin the cache entry (we don't do + // it earlier because parse_unit() may need to read it). + // + md.psrc.unpin (); + // Without modules keeping the (partially) preprocessed output // around doesn't buy us much: if the source/headers haven't changed // then neither will the object file. Modules make things more // interesting: now we may have to recompile an otherwise unchanged - // translation unit because a BMI it depends on has changed. In this - // case re-processing the translation unit would be a waste and - // compiling the original source would break distributed + // translation unit because a named module BMI it depends on has + // changed. In this case re-processing the translation unit would be + // a waste and compiling the original source would break distributed // compilation. // // Note also that the long term trend will (hopefully) be for // modularized projects to get rid of #include's which means the // need for producing this partially preprocessed output will - // (hopefully) gradually disappear. + // (hopefully) gradually disappear. Or not, most C headers will stay + // headers, and probably not importable. + // + // @@ TODO: no use keeping it if there are no named module imports + // (but see also file_cache::create() hint, and + // extract_headers() the cache case: there we just assume + // it exists if modules is true). // if (modules) - md.psrc.active = false; // Keep. + md.psrc.temporary = false; // Keep. } // Above we may have ignored changes to the translation unit. The @@ -1306,79 +1572,42 @@ namespace build2 switch (a) { - case perform_update_id: return [this] (action a, const target& t) - { - return perform_update (a, t); - }; - case perform_clean_id: return [this] (action a, const target& t) + case perform_update_id: return move (md); + case perform_clean_id: { - return perform_clean (a, t); - }; + return [this, srct = &md.src.type ()] (action a, const target& t) + { + return perform_clean (a, t, *srct); + }; + } default: return noop_recipe; // Configure update. } } - // Reverse-lookup target type(s) from extension. - // - small_vector<const target_type*, 2> compile_rule:: - map_extension (const scope& s, const string& n, const string& e) const - { - // We will just have to try all of the possible ones, in the "most - // likely to match" order. - // - auto test = [&s, &n, &e] (const target_type& tt) -> bool - { - // Call the extension derivation function. Here we know that it will - // only use the target type and name from the target key so we can - // pass bogus values for the rest. - // - target_key tk {&tt, nullptr, nullptr, &n, nullopt}; - - // This is like prerequisite search. - // - optional<string> de (tt.default_extension (tk, s, nullptr, true)); - - return de && *de == e; - }; - - small_vector<const target_type*, 2> r; - - for (const target_type* const* p (x_inc); *p != nullptr; ++p) - if (test (**p)) - r.push_back (*p); - - return r; - } - void compile_rule:: - append_prefixes (prefix_map& m, const target& t, const variable& var) const + append_prefixes (prefix_map& m, + const scope& rs, const target& t, + const variable& var) const { tracer trace (x, "compile_rule::append_prefixes"); - // If this target does not belong to any project (e.g, an "imported as - // installed" library), then it can't possibly generate any headers for - // us. - // - const scope& bs (t.base_scope ()); - const scope* rs (bs.root_scope ()); - if (rs == nullptr) - return; - - const dir_path& out_base (t.dir); - const dir_path& out_root (rs->out_path ()); - if (auto l = t[var]) { const auto& v (cast<strings> (l)); for (auto i (v.begin ()), e (v.end ()); i != e; ++i) { - // -I can either be in the "-Ifoo" or "-I foo" form. For VC it can - // also be /I. - // const string& o (*i); - if (o.size () < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I') + // -I can either be in the "-Ifoo" or "-I foo" form. For MSVC it + // can also be /I. + // + // Note that we naturally assume that -isystem, /external:I, etc., + // are not relevant here. + // + bool msvc (cclass == compiler_class::msvc); + + if (!((o[0] == '-' || (msvc && o[0] == '/')) && o[1] == 'I')) continue; dir_path d; @@ -1419,113 +1648,8 @@ namespace build2 // If we are not inside our project root, then ignore. // - if (!d.sub (out_root)) - continue; - - // If the target directory is a sub-directory of the include - // directory, then the prefix is the difference between the - // two. Otherwise, leave it empty. - // - // The idea here is to make this "canonical" setup work auto- - // magically: - // - // 1. We include all files with a prefix, e.g., <foo/bar>. - // 2. The library target is in the foo/ sub-directory, e.g., - // /tmp/foo/. - // 3. The poptions variable contains -I/tmp. - // - dir_path p (out_base.sub (d) ? out_base.leaf (d) : dir_path ()); - - // We use the target's directory as out_base but that doesn't work - // well for targets that are stashed in subdirectories. So as a - // heuristics we are going to also enter the outer directories of - // the original prefix. It is, however, possible, that another -I - // option after this one will produce one of these outer prefixes as - // its original prefix in which case we should override it. - // - // So we are going to assign the original prefix priority value 0 - // (highest) and then increment it for each outer prefix. - // - auto enter = [&trace, &m] (dir_path p, dir_path d, size_t prio) - { - auto j (m.find (p)); - - if (j != m.end ()) - { - prefix_value& v (j->second); - - // We used to reject duplicates but it seems this can be - // reasonably expected to work according to the order of the - // -I options. - // - // Seeing that we normally have more "specific" -I paths first, - // (so that we don't pick up installed headers, etc), we ignore - // it. - // - if (v.directory == d) - { - if (v.priority > prio) - v.priority = prio; - } - else if (v.priority <= prio) - { - if (verb >= 4) - trace << "ignoring mapping for prefix '" << p << "'\n" - << " existing mapping to " << v.directory - << " priority " << v.priority << '\n' - << " another mapping to " << d - << " priority " << prio; - } - else - { - if (verb >= 4) - trace << "overriding mapping for prefix '" << p << "'\n" - << " existing mapping to " << v.directory - << " priority " << v.priority << '\n' - << " new mapping to " << d - << " priority " << prio; - - v.directory = move (d); - v.priority = prio; - } - } - else - { - l6 ([&]{trace << "'" << p << "' -> " << d << " priority " - << prio;}); - m.emplace (move (p), prefix_value {move (d), prio}); - } - }; - -#if 1 - // Enter all outer prefixes, including prefixless. - // - // The prefixless part is fuzzy but seems to be doing the right - // thing ignoring/overriding-wise, at least in cases where one of - // the competing -I paths is a subdirectory of another. But the - // proper solution will be to keep all the prefixless entries (by - // changing prefix_map to a multimap) since for them we have an - // extra check (target must be explicitly spelled out in a - // buildfile). - // - for (size_t prio (0);; ++prio) - { - bool e (p.empty ()); - enter ((e ? move (p) : p), (e ? move (d) : d), prio); - if (e) - break; - p = p.directory (); - } -#else - size_t prio (0); - for (bool e (false); !e; ++prio) - { - dir_path n (p.directory ()); - e = n.empty (); - enter ((e ? move (p) : p), (e ? move (d) : d), prio); - p = move (n); - } -#endif + if (d.sub (rs.out_path ())) + append_prefix (trace, m, t, move (d)); } } } @@ -1533,83 +1657,23 @@ namespace build2 auto compile_rule:: build_prefix_map (const scope& bs, action a, - target& t, + const target& t, linfo li) const -> prefix_map { - prefix_map m; + prefix_map pm; // First process our own. // - append_prefixes (m, t, x_poptions); - append_prefixes (m, t, c_poptions); + const scope& rs (*bs.root_scope ()); + append_prefixes (pm, rs, t, x_poptions); + append_prefixes (pm, rs, t, c_poptions); // Then process the include directories from prerequisite libraries. // - append_library_prefixes (m, bs, a, t, li); - - return m; - } - - // Return the next make prerequisite starting from the specified - // position and update position to point to the start of the - // following prerequisite or l.size() if there are none left. - // - static string - next_make (const string& l, size_t& p) - { - size_t n (l.size ()); - - // Skip leading spaces. - // - for (; p != n && l[p] == ' '; p++) ; - - // Lines containing multiple prerequisites are 80 characters max. - // - string r; - r.reserve (n); - - // Scan the next prerequisite while watching out for escape sequences. - // - for (; p != n && l[p] != ' '; p++) - { - char c (l[p]); - - if (p + 1 != n) - { - if (c == '$') - { - // Got to be another (escaped) '$'. - // - if (l[p + 1] == '$') - ++p; - } - else if (c == '\\') - { - // This may or may not be an escape sequence depending on whether - // what follows is "escapable". - // - switch (c = l[++p]) - { - case '\\': break; - case ' ': break; - default: c = '\\'; --p; // Restore. - } - } - } - - r += c; - } - - // Skip trailing spaces. - // - for (; p != n && l[p] == ' '; p++) ; - - // Skip final '\'. - // - if (p == n - 1 && l[p] == '\\') - p++; + appended_libraries ls; + append_library_prefixes (ls, pm, bs, a, t, li); - return r; + return pm; } // VC /showIncludes output. The first line is the file being compiled @@ -1807,22 +1871,31 @@ namespace build2 // Any unhandled io_error is handled by the caller as a generic module // mapper io error. Returning false terminates the communication. // - struct compile_rule::module_mapper_state //@@ gcc_module_mapper_state + struct compile_rule::gcc_module_mapper_state { size_t skip; // Number of depdb entries to skip. size_t header_units = 0; // Number of header units imported. module_imports& imports; // Unused (potentially duplicate suppression). + // Include translation (looked up lazily). + // + optional<const build2::cc::translatable_headers*> translatable_headers; + small_vector<string, 2> batch; // Reuse buffers. + size_t batch_n = 0; - module_mapper_state (size_t s, module_imports& i) + gcc_module_mapper_state (size_t s, module_imports& i) : skip (s), imports (i) {} }; - bool compile_rule:: - gcc_module_mapper (module_mapper_state& st, + // The module mapper is called on one line of input at a time. It should + // return nullopt if another line is expected (batch), false if the mapper + // interaction should be terminated, and true if it should be continued. + // + optional<bool> compile_rule:: + gcc_module_mapper (gcc_module_mapper_state& st, action a, const scope& bs, file& t, linfo li, - ifdstream& is, + const string& l, ofdstream& os, depdb& dd, bool& update, bool& bad_error, optional<prefix_map>& pfx_map, srcout_map& so_map) const @@ -1838,35 +1911,40 @@ namespace build2 // Read in the entire batch trying hard to reuse the buffers. // - auto& batch (st.batch); - size_t batch_n (0); + small_vector<string, 2>& batch (st.batch); + size_t& batch_n (st.batch_n); - for (;;) + // Add the next line. + // { if (batch.size () == batch_n) - batch.push_back (string ()); - - string& r (batch[batch_n]); - - if (eof (getline (is, r))) - break; + batch.push_back (l); + else + batch[batch_n] = l; batch_n++; + } - if (r.back () != ';') - break; + // Check if more is expected in this batch. + // + { + string& r (batch[batch_n - 1]); - // Strip the trailing `;` word. - // - r.pop_back (); - r.pop_back (); - } + if (r.back () == ';') + { + // Strip the trailing `;` word. + // + r.pop_back (); + r.pop_back (); - if (batch_n == 0) // EOF - return false; + return nullopt; + } + } if (verb >= 3) { + // It doesn't feel like buffering this would be useful. + // // Note that we show `;` in requests/responses so that the result // could be replayed. // @@ -1888,23 +1966,211 @@ namespace build2 for (size_t i (0); i != batch_n; ++i) { string& r (batch[i]); + size_t rn (r.size ()); - // @@ TODO: quoting and escaping. + // The protocol uses a peculiar quoting/escaping scheme that can be + // summarized as follows (see the libcody documentation for details): + // + // - Words are seperated with spaces and/or tabs. + // + // - Words need not be quoted if they only containing characters from + // the [-+_/%.A-Za-z0-9] set. // - size_t b (0), e (0), n; // Next word. + // - Otherwise words need to be single-quoted. + // + // - Inside single-quoted words, the \n \t \' and \\ escape sequences + // are recognized. + // + // Note that we currently don't treat abutted quotes (as in a' 'b) as + // a single word (it doesn't seem plausible that we will ever receive + // something like this). + // + size_t b (0), e (0), n; bool q; // Next word. - auto next = [&r, &b, &e, &n] () -> size_t + auto next = [&r, rn, &b, &e, &n, &q] () -> size_t { - return (n = next_word (r, b, e, ' ', '\t')); + if (b != e) + b = e; + + // Skip leading whitespaces. + // + for (; b != rn && (r[b] == ' ' || r[b] == '\t'); ++b) ; + + if (b != rn) + { + q = (r[b] == '\''); + + // Find first trailing whitespace or closing quote. + // + for (e = b + 1; e != rn; ++e) + { + // Note that we deal with invalid quoting/escaping in unquote(). + // + switch (r[e]) + { + case ' ': + case '\t': + if (q) + continue; + else + break; + case '\'': + if (q) + { + ++e; // Include closing quote (hopefully). + break; + } + else + { + assert (false); // Abutted quote. + break; + } + case '\\': + if (++e != rn) // Skip next character (hopefully). + continue; + else + break; + default: + continue; + } + + break; + } + + n = e - b; + } + else + { + q = false; + e = rn; + n = 0; + } + + return n; }; + // Unquote into tmp the current word returning false if malformed. + // + auto unquote = [&r, &b, &n, &q, &tmp] (bool clear = true) -> bool + { + if (q && n > 1) + { + size_t e (b + n - 1); + + if (r[b] == '\'' && r[e] == '\'') + { + if (clear) + tmp.clear (); + + size_t i (b + 1); + for (; i != e; ++i) + { + char c (r[i]); + if (c == '\\') + { + if (++i == e) + { + i = 0; + break; + } + + c = r[i]; + if (c == 'n') c = '\n'; + else if (c == 't') c = '\t'; + } + tmp += c; + } + + if (i == e) + return true; + } + } + + return false; + }; + +#if 0 +#define UNQUOTE(x, y) \ + r = x; rn = r.size (); b = e = 0; \ + assert (next () && unquote () && tmp == y) + + UNQUOTE ("'foo bar'", "foo bar"); + UNQUOTE (" 'foo bar' ", "foo bar"); + UNQUOTE ("'foo\\\\bar'", "foo\\bar"); + UNQUOTE ("'\\'foo bar'", "'foo bar"); + UNQUOTE ("'foo bar\\''", "foo bar'"); + UNQUOTE ("'\\'foo\\\\bar\\''", "'foo\\bar'"); + + fail << "all good"; +#endif + + // Escape if necessary the specified string and append to r. + // + auto escape = [&r] (const string& s) + { + size_t b (0), e, n (s.size ()); + while (b != n && (e = s.find_first_of ("\\'\n\t", b)) != string::npos) + { + r.append (s, b, e - b); // Preceding chunk. + + char c (s[e]); + r += '\\'; + r += (c == '\n' ? 'n' : c == '\t' ? 't' : c); + b = e + 1; + } + + if (b != n) + r.append (s, b, e); // Final chunk. + }; + + // Quote and escape if necessary the specified string and append to r. + // + auto quote = [&r, &escape] (const string& s) + { + if (find_if (s.begin (), s.end (), + [] (char c) + { + return !((c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + c == '-' || c == '_' || c == '/' || + c == '.' || c == '+' || c == '%'); + }) == s.end ()) + { + r += s; + } + else + { + r += '\''; + escape (s); + r += '\''; + } + }; + +#if 0 +#define QUOTE(x, y) \ + r.clear (); quote (x); \ + assert (r == y) + + QUOTE ("foo/Bar-7.h", "foo/Bar-7.h"); + + QUOTE ("foo bar", "'foo bar'"); + QUOTE ("foo\\bar", "'foo\\\\bar'"); + QUOTE ("'foo bar", "'\\'foo bar'"); + QUOTE ("foo bar'", "'foo bar\\''"); + QUOTE ("'foo\\bar'", "'\\'foo\\\\bar\\''"); + + fail << "all good"; +#endif + next (); // Request name. - auto name = [&r, b, n] (const char* c) -> bool + auto name = [&r, b, n, q] (const char* c) -> bool { // We can reasonably assume a command will never be quoted. // - return (r.compare (b, n, c) == 0 && + return (!q && + r.compare (b, n, c) == 0 && (r[n] == ' ' || r[n] == '\t' || r[n] == '\0')); }; @@ -1953,7 +2219,17 @@ namespace build2 if (next ()) { - path f (r, b, n); + path f; + if (!q) + f = path (r, b, n); + else if (unquote ()) + f = path (tmp); + else + { + r = "ERROR 'malformed quoting/escaping in request'"; + continue; + } + bool exists (true); // The TU path we pass to the compiler is always absolute so any @@ -1964,8 +2240,9 @@ namespace build2 // if (exists && f.relative ()) { - tmp.assign (r, b, n); - r = "ERROR relative header path '"; r += tmp; r += '\''; + r = "ERROR 'relative header path "; + escape (f.string ()); + r += '\''; continue; } @@ -2002,16 +2279,17 @@ namespace build2 try { pair<const file*, bool> er ( - enter_header (a, bs, t, li, - move (f), false /* cache */, false /* norm */, - pfx_map, so_map)); + enter_header ( + a, bs, t, li, + move (f), false /* cache */, false /* normalized */, + pfx_map, so_map)); ht = er.first; remapped = er.second; if (remapped) { - r = "ERROR remapping of headers not supported"; + r = "ERROR 'remapping of headers not supported'"; continue; } @@ -2021,14 +2299,14 @@ namespace build2 // diagnostics won't really add anything to the compiler's. So // let's only print it at -V or higher. // - if (ht == nullptr) + if (ht == nullptr) // f is still valid. { assert (!exists); // Sanity check. if (verb > 2) { diag_record dr; - dr << error << "header '" << f << "' not found and no " + dr << error << "header " << f << " not found and no " << "rule to generate it"; if (verb < 4) @@ -2069,8 +2347,10 @@ namespace build2 // messy, let's keep both (it would have been nicer to print // ours after the compiler's but that isn't easy). // - r = "ERROR unable to update header '"; - r += (ht != nullptr ? ht->path () : f).string (); + // Note: if ht is NULL, f is still valid. + // + r = "ERROR 'unable to update header "; + escape ((ht != nullptr ? ht->path () : f).string ()); r += '\''; continue; } @@ -2094,21 +2374,84 @@ namespace build2 // Now handle INCLUDE and IMPORT differences. // - const string& hp (ht->path ().string ()); + const path& hp (ht->path ()); + const string& hs (hp.string ()); // Reduce include translation to the import case. // - if (!imp && xlate_hdr != nullptr) + if (!imp) { - auto i (lower_bound ( - xlate_hdr->begin (), xlate_hdr->end (), - hp, - [] (const string& x, const string& y) - { - return path_traits::compare (x, y) < 0; - })); + if (!st.translatable_headers) + st.translatable_headers = + cast_null<translatable_headers> (t[x_translate_include]); + + if (*st.translatable_headers != nullptr) + { + auto& ths (**st.translatable_headers); + + // First look for the header path in the translatable headers + // itself. + // + auto i (ths.find (hs)), ie (ths.end ()); + + // Next look it up in the importable headers and then look up + // the associated groups in the translatable headers. + // + if (i == ie) + { + slock l (importable_headers->mutex); + auto& ihs (importable_headers->header_map); + + auto j (ihs.find (hp)), je (ihs.end ()); + + if (j != je) + { + // The groups are ordered from the most to least specific. + // + for (const string& g: j->second) + if ((i = ths.find (g)) != ie) + break; + } + + // Finally look for the `all` groups. + // + if (i == ie) + { + i = ths.find (header_group_all_importable); + + if (i != ie) + { + // See if this header is marked as importable. + // + if (lookup l = (*ht)[c_importable]) + { + if (!cast<bool> (l)) + i = ie; + } + else if (j != je) + { + // See if this is one of ad hoc *-importable groups + // (currently only std-importable). + // + const auto& gs (j->second); + if (find (gs.begin (), + gs.end (), + header_group_std_importable) == gs.end ()) + i = ie; + } + else + i = ie; + } + + if (i == ie) + i = ths.find (header_group_all); + } + } - imp = (i != xlate_hdr->end () && *i == hp); + // Translate if we found an entry and it's not false. + // + imp = (i != ie && (!i->second || *i->second)); + } } if (imp) @@ -2118,7 +2461,7 @@ namespace build2 // Synthesize the BMI dependency then update and add the BMI // target as a prerequisite. // - const file& bt (make_header_sidebuild (a, bs, li, *ht)); + const file& bt (make_header_sidebuild (a, bs, t, li, *ht)); if (!skip) { @@ -2142,16 +2485,27 @@ namespace build2 // original (which we may need to normalize when we read // this mapping in extract_headers()). // - tmp = "@ "; tmp.append (r, b, n); tmp += ' '; tmp += bp; + // @@ This still breaks if the header path contains spaces. + // GCC bug 110153. + // + tmp = "@ "; + if (!q) tmp.append (r, b, n); + else unquote (false /* clear */); // Can't fail. + tmp += ' '; + tmp += bp; + dd.expect (tmp); st.header_units++; } - r = "PATHNAME "; r += bp; + r = "PATHNAME "; + quote (bp); } catch (const failed&) { - r = "ERROR 'unable to update header unit "; r += hp; r += '\''; + r = "ERROR 'unable to update header unit for "; + escape (hs); + r += '\''; continue; } } @@ -2160,7 +2514,7 @@ namespace build2 if (skip) st.skip--; else - dd.expect (hp); + dd.expect (hs); // Confusingly, TRUE means include textually and FALSE means we // don't know. @@ -2177,7 +2531,7 @@ namespace build2 // Truncate the response batch and terminate the communication (see // also libcody issue #22). // - tmp.assign (r, b, n); + tmp.assign (r, b, n); // Request name (unquoted). r = "ERROR '"; r += w; r += ' '; r += tmp; r += '\''; batch_n = i + 1; term = true; @@ -2193,6 +2547,9 @@ namespace build2 // Write the response batch. // + // @@ It's theoretically possible that we get blocked writing the + // response while the compiler gets blocked writing the diagnostics. + // for (size_t i (0);; ) { string& r (batch[i]); @@ -2213,6 +2570,8 @@ namespace build2 os.flush (); + batch_n = 0; // Start a new batch. + return !term; } @@ -2466,9 +2825,10 @@ namespace build2 if (exists) { pair<const file*, bool> r ( - enter_header (a, bs, t, li, - move (f), false /* cache */, false /* norm */, - pfx_map, so_map)); + enter_header ( + a, bs, t, li, + move (f), false /* cache */, false /* normalized */, + pfx_map, so_map)); if (!r.second) // Shouldn't be remapped. ht = r.first; @@ -2476,7 +2836,7 @@ namespace build2 if (ht != pts.back ()) { - ht = static_cast<const file*> (pts.back ().target); + ht = &pts.back ().target->as<file> (); rs = "ERROR expected header '" + ht->path ().string () + "' to be found instead"; bad_error = true; // We expect an error from the compiler. @@ -2493,9 +2853,10 @@ namespace build2 try { pair<const file*, bool> er ( - enter_header (a, bs, t, li, - move (f), false /* cache */, false /* norm */, - pfx_map, so_map)); + enter_header ( + a, bs, t, li, + move (f), false /* cache */, false /* normalized */, + pfx_map, so_map)); ht = er.first; remapped = er.second; @@ -2513,7 +2874,7 @@ namespace build2 // diagnostics won't really add anything to the compiler's. So // let's only print it at -V or higher. // - if (ht == nullptr) + if (ht == nullptr) // f is still valid. { assert (!exists); // Sanity check. @@ -2560,10 +2921,12 @@ namespace build2 // messy, let's keep both (it would have been nicer to print // ours after the compiler's but that isn't easy). // + // Note: if ht is NULL, f is still valid. + // rs = !exists ? string ("INCLUDE") : ("ERROR unable to update header '" + - (ht != nullptr ? ht->path () : f).string () + "'"); + (ht != nullptr ? ht->path () : f).string () + '\''); bad_error = true; break; @@ -2611,7 +2974,7 @@ namespace build2 // Synthesize the BMI dependency then update and add the BMI // target as a prerequisite. // - const file& bt (make_header_sidebuild (a, bs, li, *ht)); + const file& bt (make_header_sidebuild (a, bs, t, li, *ht)); if (!skip) { @@ -2641,7 +3004,7 @@ namespace build2 } catch (const failed&) { - rs = "ERROR unable to update header unit '" + hp + "'"; + rs = "ERROR unable to update header unit '" + hp + '\''; bad_error = true; break; } @@ -2683,419 +3046,204 @@ namespace build2 } #endif - // Enter as a target a header file. Depending on the cache flag, the file - // is assumed to either have come from the depdb cache or from the - // compiler run. - // - // Return the header target and an indication of whether it was remapped - // or NULL if the header does not exist and cannot be generated. In the - // latter case the passed header path is guaranteed to be still valid but - // might have been adjusted (e.g., normalized, etc). + //atomic_count cache_hit {0}; + //atomic_count cache_mis {0}; + //atomic_count cache_cls {0}; + + // The fp path is only moved from on success. // // Note: this used to be a lambda inside extract_headers() so refer to the // body of that function for the overall picture. // pair<const file*, bool> compile_rule:: enter_header (action a, const scope& bs, file& t, linfo li, - path&& f, bool cache, bool norm, - optional<prefix_map>& pfx_map, srcout_map& so_map) const + path&& fp, bool cache, bool norm, + optional<prefix_map>& pfx_map, + const srcout_map& so_map) const { tracer trace (x, "compile_rule::enter_header"); - // Find or maybe insert the target. The directory is only moved from if - // insert is true. Note that it must be normalized. - // - auto find = [&trace, &t, this] (dir_path&& d, - path&& f, - bool insert) -> const file* + // It's reasonable to expect the same header to be included by multiple + // translation units, which means we will be re-doing this work over and + // over again. And it's not exactly cheap, taking up to 50% of an + // up-to-date check time on some projects. So we are going to cache the + // header path to target mapping. + // + // While we pass quite a bit of specific "context" (target, base scope) + // to enter_file(), here is the analysis why the result will not depend + // on this context for the non-absent header (fp is absolute): + // + // 1. Let's start with the base scope (bs). Firstly, the base scope + // passed to map_extension() is the scope of the header (i.e., it is + // the scope of fp.directory()). Other than that, the target base + // scope is only passed to build_prefix_map() which is only called + // for the absent header (linfo is also only used here). + // + // 2. Next is the target (t). It is passed to build_prefix_map() but + // that doesn't matter for the same reason as in (1). Other than + // that, it is only passed to build2::search() which in turn passes + // it to target type-specific prerequisite search callback (see + // target_type::search) if one is not NULL. The target type in + // question here is one of the headers and we know all of them use + // the standard file_search() which ignores the passed target. + // + // 3. Finally, so_map could be used for an absolute fp. While we could + // simply not cache the result if it was used (second half of the + // result pair is true), there doesn't seem to be any harm in caching + // the remapped path->target mapping. In fact, if to think about it, + // there is no harm in caching the generated file mapping since it + // will be immediately generated and any subsequent inclusions we + // will "see" with an absolute path, which we can resolve from the + // cache. + // + // To put it another way, all we need to do is make sure that if we were + // to not return an existing cache entry, the call to enter_file() would + // have returned exactly the same path/target. + // + // @@ Could it be that the header is re-mapped in one config but not the + // other (e.g., when we do both in src and in out builds and we pick + // the generated header in src)? If so, that would lead to a + // divergence. I.e., we would cache the no-remap case first and then + // return it even though the re-map is necessary? Why can't we just + // check for re-mapping ourselves? A: the remapping logic in + // enter_file() is not exactly trivial. + // + // But on the other hand, I think we can assume that different + // configurations will end up with different caches. In other words, + // we can assume that for the same "cc amalgamation" we use only a + // single "version" of a header. Seems reasonable. + // + // Note also that while it would have been nice to have a unified cc + // cache, the map_extension() call is passed x_incs which is module- + // specific. In other words, we may end up mapping the same header to + // two different targets depending on whether it is included from, say, + // C or C++ translation unit. We could have used a unified cache for + // headers that were mapped using the fallback target type, which would + // cover the installed headers. Maybe, one day (it's also possible that + // separate caches reduce contention). + // + // Another related question is where we want to keep the cache: project, + // strong amalgamation, or weak amalgamation (like module sidebuilds). + // Some experimentation showed that weak has the best performance (which + // suggest that a unified cache will probably be a win). + // + // Note also that we don't need to clear this cache since we never clear + // the targets set. In other words, the only time targets are + // invalidated is when we destroy the build context, which also destroys + // the cache. + // + const config_module& hc (*header_cache_); + + // First check the cache. + // + config_module::header_key hk; + + bool e (fp.absolute ()); + if (e) { - // Split the file into its name part and extension. Here we can assume - // the name part is a valid filesystem name. - // - // Note that if the file has no extension, we record an empty - // extension rather than NULL (which would signify that the default - // extension should be added). - // - string e (f.extension ()); - string n (move (f).string ()); - - if (!e.empty ()) - n.resize (n.size () - e.size () - 1); // One for the dot. - - // See if this directory is part of any project out_root hierarchy and - // if so determine the target type. - // - // Note that this will miss all the headers that come from src_root - // (so they will be treated as generic C headers below). Generally, we - // don't have the ability to determine that some file belongs to - // src_root of some project. But that's not a problem for our - // purposes: it is only important for us to accurately determine - // target types for headers that could be auto-generated. - // - // While at it also try to determine if this target is from the src or - // out tree of said project. - // - dir_path out; - - // It's possible the extension-to-target type mapping is ambiguous - // (usually because both C and X-language headers use the same .h - // extension). In this case we will first try to find one that matches - // an explicit target (similar logic to when insert is false). - // - small_vector<const target_type*, 2> tts; - - const scope& bs (t.ctx.scopes.find (d)); - if (const scope* rs = bs.root_scope ()) + if (!norm) { - tts = map_extension (bs, n, e); - - if (bs.out_path () != bs.src_path () && d.sub (bs.src_path ())) - out = out_src (d, *rs); + normalize_external (fp, "header"); + norm = true; } - // If it is outside any project, or the project doesn't have such an - // extension, assume it is a plain old C header. - // - if (tts.empty ()) - { - // If the project doesn't "know" this extension then we can't - // possibly find an explicit target of this type. - // - if (!insert) - return nullptr; + hk.file = move (fp); + hk.hash = hash<path> () (hk.file); - tts.push_back (&h::static_type); - } - - // Find or insert target. - // - // Note that in case of the target type ambiguity we first try to find - // an explicit target that resolves this ambiguity. - // - const target* r (nullptr); - - if (!insert || tts.size () > 1) + slock l (hc.header_map_mutex); + auto i (hc.header_map.find (hk)); + if (i != hc.header_map.end ()) { - // Note that we skip any target type-specific searches (like for an - // existing file) and go straight for the target object since we - // need to find the target explicitly spelled out. - // - // Also, it doesn't feel like we should be able to resolve an - // absolute path with a spelled-out extension to multiple targets. - // - for (const target_type* tt: tts) - if ((r = t.ctx.targets.find (*tt, d, out, n, e, trace)) != nullptr) - break; - - // Note: we can't do this because of the in-source builds where - // there won't be explicit targets for non-generated headers. - // - // This should be harmless, however, since in our world generated - // headers are normally spelled-out as explicit targets. And if not, - // we will still get an error, just a bit less specific. - // -#if 0 - if (r == nullptr && insert) - { - f = d / n; - if (!e.empty ()) - { - f += '.'; - f += e; - } - - diag_record dr (fail); - dr << "mapping of header " << f << " to target type is ambiguous"; - for (const target_type* tt: tts) - dr << info << "could be " << tt->name << "{}"; - dr << info << "spell-out its target to resolve this ambiguity"; - } -#endif + //cache_hit.fetch_add (1, memory_order_relaxed); + return make_pair (i->second, false); } - // @@ OPT: move d, out, n - // - if (r == nullptr && insert) - r = &search (t, *tts[0], d, out, n, &e, nullptr); + fp = move (hk.file); - return static_cast<const file*> (r); - }; + //cache_mis.fetch_add (1, memory_order_relaxed); + } - // If it's not absolute then it either does not (yet) exist or is a - // relative ""-include (see init_args() for details). Reduce the second - // case to absolute. - // - // Note: we now always use absolute path to the translation unit so this - // no longer applies. But let's keep it for posterity. - // -#if 0 - if (f.relative () && rels.relative ()) + struct data { - // If the relative source path has a directory component, make sure - // it matches since ""-include will always start with that (none of - // the compilers we support try to normalize this path). Failed that - // we may end up searching for a generated header in a random - // (working) directory. - // - const string& fs (f.string ()); - const string& ss (rels.string ()); - - size_t p (path::traits::rfind_separator (ss)); - - if (p == string::npos || // No directory. - (fs.size () > p + 1 && - path::traits::compare (fs.c_str (), p, ss.c_str (), p) == 0)) - { - path t (work / f); // The rels path is relative to work. - - if (exists (t)) - f = move (t); - } - } -#endif + linfo li; + optional<prefix_map>& pfx_map; + } d {li, pfx_map}; + + // If it is outside any project, or the project doesn't have such an + // extension, assume it is a plain old C header. + // + auto r (enter_file ( + trace, "header", + a, bs, t, + fp, cache, norm, + [this] (const scope& bs, const string& n, const string& e) + { + return map_extension (bs, n, e, x_incs); + }, + h::static_type, + [this, &d] (action a, const scope& bs, const target& t) + -> const prefix_map& + { + if (!d.pfx_map) + d.pfx_map = build_prefix_map (bs, a, t, d.li); - const file* pt (nullptr); - bool remapped (false); + return *d.pfx_map; + }, + so_map)); - // If still relative then it does not exist. + // Cache. // - if (f.relative ()) + if (r.first != nullptr) { - // This is probably as often an error as an auto-generated file, so - // trace at level 4. - // - l4 ([&]{trace << "non-existent header '" << f << "'";}); - - f.normalize (); + hk.file = move (fp); - // The relative path might still contain '..' (e.g., ../foo.hxx; - // presumably ""-include'ed). We don't attempt to support auto- - // generated headers with such inclusion styles. + // Calculate the hash if we haven't yet and re-calculate it if the + // path has changed (header has been remapped). // - if (f.normalized ()) - { - if (!pfx_map) - pfx_map = build_prefix_map (bs, a, t, li); - - // First try the whole file. Then just the directory. - // - // @@ Has to be a separate map since the prefix can be the same as - // the file name. - // - // auto i (pfx_map->find (f)); - - // Find the most qualified prefix of which we are a sub-path. - // - if (!pfx_map->empty ()) - { - dir_path d (f.directory ()); - auto i (pfx_map->find_sup (d)); - - if (i != pfx_map->end ()) - { - // Note: value in pfx_map is not necessarily canonical. - // - dir_path pd (i->second.directory); - pd.canonicalize (); - - l4 ([&]{trace << "prefix '" << d << "' mapped to " << pd;}); + if (!e || r.second) + hk.hash = hash<path> () (hk.file); - // If this is a prefixless mapping, then only use it if we can - // resolve it to an existing target (i.e., it is explicitly - // spelled out in a buildfile). - // - // Note that at some point we will probably have a list of - // directories. - // - pt = find (pd / d, f.leaf (), !i->first.empty ()); - if (pt != nullptr) - { - f = pd / f; - l4 ([&]{trace << "mapped as auto-generated " << f;}); - } - else - l4 ([&]{trace << "no explicit target in " << pd;}); - } - else - l4 ([&]{trace << "no prefix map entry for '" << d << "'";}); - } - else - l4 ([&]{trace << "prefix map is empty";}); - } - } - else - { - // We used to just normalize the path but that could result in an - // invalid path (e.g., for some system/compiler headers on CentOS 7 - // with Clang 3.4) because of the symlinks (if a directory component - // is a symlink, then any following `..` are resolved relative to the - // target; see path::normalize() for background). - // - // Initially, to fix this, we realized (i.e., realpath(3)) it instead. - // But that turned out also not to be quite right since now we have - // all the symlinks resolved: conceptually it feels correct to keep - // the original header names since that's how the user chose to - // arrange things and practically this is how the compilers see/report - // them (e.g., the GCC module mapper). - // - // So now we have a pretty elaborate scheme where we try to use the - // normalized path if possible and fallback to realized. Normalized - // paths will work for situations where `..` does not cross symlink - // boundaries, which is the sane case. And for the insane case we only - // really care about out-of-project files (i.e., system/compiler - // headers). In other words, if you have the insane case inside your - // project, then you are on your own. - // - // All of this is unless the path comes from the depdb, in which case - // we've already done that (normally). This is also where we handle - // src-out remap (again, not needed if cached). - // - if (!cache || norm) + const file* f; { - // Interestingly, on most paltforms and with most compilers (Clang - // on Linux being a notable exception) most system/compiler headers - // are already normalized. - // - path_abnormality a (f.abnormalities ()); - if (a != path_abnormality::none) - { - // While we can reasonably expect this path to exit, things do go - // south from time to time (like compiling under wine with file - // wlantypes.h included as WlanTypes.h). - // - try - { - // If we have any parent components, then we have to verify the - // normalized path matches realized. - // - path r; - if ((a & path_abnormality::parent) == path_abnormality::parent) - { - r = f; - r.realize (); - } - - try - { - f.normalize (); - - // Note that we might still need to resolve symlinks in the - // normalized path. - // - if (!r.empty () && f != r && path (f).realize () != r) - f = move (r); - } - catch (const invalid_path&) - { - assert (!r.empty ()); // Shouldn't have failed if no `..`. - f = move (r); // Fallback to realize. - } - } - catch (const invalid_path&) - { - fail << "invalid header path '" << f.string () << "'"; - } - catch (const system_error& e) - { - fail << "invalid header path '" << f.string () << "': " << e; - } - } + ulock l (hc.header_map_mutex); + auto p (hc.header_map.emplace (move (hk), r.first)); + f = p.second ? nullptr : p.first->second; } - if (!cache) + if (f != nullptr) { - if (!so_map.empty ()) - { - // Find the most qualified prefix of which we are a sub-path. - // - auto i (so_map.find_sup (f)); - if (i != so_map.end ()) - { - // Ok, there is an out tree for this headers. Remap to a path - // from the out tree and see if there is a target for it. Note - // that the value in so_map is not necessarily canonical. - // - dir_path d (i->second); - d /= f.leaf (i->first).directory (); - d.canonicalize (); - - pt = find (move (d), f.leaf (), false); // d is not moved from. - - if (pt != nullptr) - { - path p (d / f.leaf ()); - l4 ([&]{trace << "remapping " << f << " to " << p;}); - f = move (p); - remapped = true; - } - } - } - } - - if (pt == nullptr) - { - l6 ([&]{trace << "entering " << f;}); - pt = find (f.directory (), f.leaf (), true); + //cache_cls.fetch_add (1, memory_order_relaxed); + assert (r.first == f); } } - return make_pair (pt, remapped); + return r; } - // Update and add to the list of prerequisite targets a header or header - // unit target. - // - // Return the indication of whether it has changed or, if the passed - // timestamp is not timestamp_unknown, is older than the target. If the - // header does not exists nor can be generated (no rule), then issue - // diagnostics and fail if the fail argument is true and return nullopt - // otherwise. - // // Note: this used to be a lambda inside extract_headers() so refer to the // body of that function for the overall picture. // optional<bool> compile_rule:: inject_header (action a, file& t, - const file& pt, timestamp mt, bool f /* fail */) const + const file& pt, timestamp mt, bool fail) const { tracer trace (x, "compile_rule::inject_header"); - // Even if failing we still use try_match() in order to issue consistent - // (with extract_headers() below) diagnostics (rather than the generic - // "not rule to update ..."). - // - if (!try_match (a, pt).first) - { - if (!f) - return nullopt; - - diag_record dr; - dr << fail << "header " << pt << " not found and no rule to " - << "generate it"; - - if (verb < 4) - dr << info << "re-run with --verbose=4 for more information"; - } - - bool r (update (trace, a, pt, mt)); - - // Add to our prerequisite target list. - // - t.prerequisite_targets[a].push_back (&pt); - - return r; + return inject_file (trace, "header", a, t, pt, mt, fail); } - // Extract and inject header dependencies. Return the preprocessed source - // file as well as an indication if it is usable for compilation (see - // below for details). + // Extract and inject header dependencies. Return (in result) the + // preprocessed source file as well as an indication if it is usable for + // compilation (see below for details). Note that result is expected to + // be initialized to {entry (), false}. Not using return type due to + // GCC bug #107555. // // This is also the place where we handle header units which are a lot // more like auto-generated headers than modules. In particular, if a // header unit BMI is out-of-date, then we have to re-preprocess this // translation unit. // - pair<auto_rmfile, bool> compile_rule:: + void compile_rule:: extract_headers (action a, const scope& bs, file& t, @@ -3105,30 +3253,30 @@ namespace build2 depdb& dd, bool& update, timestamp mt, - module_imports& imports) const + module_imports& imports, + pair<file_cache::entry, bool>& result) const { tracer trace (x, "compile_rule::extract_headers"); + context& ctx (t.ctx); + otype ot (li.type); bool reprocess (cast_false<bool> (t[c_reprocess])); - auto_rmfile psrc; + file_cache::entry psrc; bool puse (true); - // If things go wrong (and they often do in this area), give the user a - // bit extra context. + // Preprocessed file extension. // - auto df = make_diag_frame ( - [&src](const diag_record& dr) - { - if (verb != 0) - dr << info << "while extracting header dependencies from " << src; - }); + const char* pext (x_assembler_cpp (src) ? ".Si" : + x_objective (src) ? x_obj_pext : + x_pext); // Preprocesor mode that preserves as much information as possible while // still performing inclusions. Also serves as a flag indicating whether - // this compiler uses the separate preprocess and compile setup. + // this (non-MSVC) compiler uses the separate preprocess and compile + // setup. // const char* pp (nullptr); @@ -3139,7 +3287,16 @@ namespace build2 // -fdirectives-only is available since GCC 4.3.0. // if (cmaj > 4 || (cmaj == 4 && cmin >= 3)) - pp = "-fdirectives-only"; + { + // Note that for assembler-with-cpp GCC currently forces full + // preprocessing in (what appears to be) an attempt to paper over + // a deeper issue (see GCC bug 109534). If/when that bug gets + // fixed, we can enable this on our side. Note that Clang's + // -frewrite-includes also has issues (see below). + // + if (!x_assembler_cpp (src)) + pp = "-fdirectives-only"; + } break; } @@ -3148,7 +3305,16 @@ namespace build2 // -frewrite-includes is available since Clang 3.2.0. // if (cmaj > 3 || (cmaj == 3 && cmin >= 2)) - pp = "-frewrite-includes"; + { + // While Clang's -frewrite-includes appears to work, there are + // some issues with correctly tracking location information + // (manifests itself as wrong line numbers in debug info, for + // example). The result also appears to reference the .Si file + // instead of the original source file for some reason. + // + if (!x_assembler_cpp (src)) + pp = "-frewrite-includes"; + } break; } @@ -3229,7 +3395,7 @@ namespace build2 // // GCC's -fdirective-only, on the other hand, processes all the // directives so they are gone from the preprocessed source. Here is - // what we are going to do to work around this: we will detect if any + // what we are going to do to work around this: we will sense if any // diagnostics has been written to stderr on the -E run. If that's the // case (but the compiler indicated success) then we assume they are // warnings and disable the use of the preprocessed output for @@ -3258,6 +3424,8 @@ namespace build2 // // So seeing that it is hard to trigger a legitimate VC preprocessor // warning, for now, we will just treat them as errors by adding /WX. + // BTW, another example of a plausible preprocessor warnings are C4819 + // and C4828 (character unrepresentable in source charset). // // Finally, if we are using the module mapper, then all this mess falls // away: we only run the compiler once, we let the diagnostics through, @@ -3265,7 +3433,9 @@ namespace build2 // not found, and there is no problem with outdated generated headers // since we update/remap them before the compiler has a chance to read // them. Overall, this "dependency mapper" approach is how it should - // have been done from the beginning. + // have been done from the beginning. Note: that's the ideal world, + // the reality is that the required mapper extensions are not (yet) + // in libcody/GCC. // Note: diagnostics sensing is currently only supported if dependency // info is written to a file (see above). @@ -3275,15 +3445,15 @@ namespace build2 // And here is another problem: if we have an already generated header // in src and the one in out does not yet exist, then the compiler will // pick the one in src and we won't even notice. Note that this is not - // only an issue with mixing in- and out-of-tree builds (which does feel + // only an issue with mixing in and out of source builds (which does feel // wrong but is oh so convenient): this is also a problem with // pre-generated headers, a technique we use to make installing the // generator by end-users optional by shipping pre-generated headers. // // This is a nasty problem that doesn't seem to have a perfect solution - // (except, perhaps, C++ modules). So what we are going to do is try to - // rectify the situation by detecting and automatically remapping such - // mis-inclusions. It works as follows. + // (except, perhaps, C++ modules and/or module mapper). So what we are + // going to do is try to rectify the situation by detecting and + // automatically remapping such mis-inclusions. It works as follows. // // First we will build a map of src/out pairs that were specified with // -I. Here, for performance and simplicity, we will assume that they @@ -3296,10 +3466,7 @@ namespace build2 // case, then we calculate a corresponding header in the out tree and, // (this is the most important part), check if there is a target for // this header in the out tree. This should be fairly accurate and not - // require anything explicit from the user except perhaps for a case - // where the header is generated out of nothing (so there is no need to - // explicitly mention its target in the buildfile). But this probably - // won't be very common. + // require anything explicit from the user. // // One tricky area in this setup are target groups: if the generated // sources are mentioned in the buildfile as a group, then there might @@ -3309,10 +3476,7 @@ namespace build2 // generated depending on the options (e.g., inline files might be // suppressed), headers are usually non-optional. // - // Note that we use path_map instead of dir_path_map to allow searching - // using path (file path). - // - srcout_map so_map; // path_map<dir_path> + srcout_map so_map; // Dynamic module mapper. // @@ -3320,18 +3484,20 @@ namespace build2 // The gen argument to init_args() is in/out. The caller signals whether // to force the generated header support and on return it signals - // whether this support is enabled. The first call to init_args is - // expected to have gen false. + // whether this support is enabled. If gen is false, then stderr is + // expected to be either discarded or merged with sdtout. // // Return NULL if the dependency information goes to stdout and a // pointer to the temporary file path otherwise. // - auto init_args = [a, &t, ot, li, reprocess, + auto init_args = [a, &t, ot, li, reprocess, pext, &src, &md, &psrc, &sense_diag, &mod_mapper, &bs, pp, &env, &args, &args_gen, &args_i, &out, &drm, &so_map, this] (bool& gen) -> const path* { + context& ctx (t.ctx); + const path* r (nullptr); if (args.empty ()) // First call. @@ -3390,40 +3556,52 @@ namespace build2 // Populate the src-out with the -I$out_base -I$src_base pairs. // { + srcout_builder builder (ctx, so_map); + // Try to be fast and efficient by reusing buffers as much as // possible. // string ds; - // Previous -I innermost scope if out_base plus the difference - // between the scope path and the -I path (normally empty). - // - const scope* s (nullptr); - dir_path p; - for (auto i (args.begin ()), e (args.end ()); i != e; ++i) { + const char* o (*i); + // -I can either be in the "-Ifoo" or "-I foo" form. For VC it // can also be /I. // - const char* o (*i); - size_t n (strlen (o)); - - if (n < 2 || (o[0] != '-' && o[0] != '/') || o[1] != 'I') + // Note also that append_library_options() may have translated + // -I to -isystem or /external:I so we have to recognize those + // as well. + // { - s = nullptr; - continue; - } + bool msvc (cclass == compiler_class::msvc); - if (n == 2) - { - if (++i == e) - break; // Let the compiler complain. + size_t p (0); + if (o[0] == '-' || (msvc && o[0] == '/')) + { + p = (o[1] == 'I' ? 2 : + !msvc && strncmp (o + 1, "isystem", 7) == 0 ? 8 : + msvc && strncmp (o + 1, "external:I", 10) == 0 ? 11 : 0); + } + + if (p == 0) + { + builder.skip (); + continue; + } + + size_t n (strlen (o)); + if (n == p) + { + if (++i == e) + break; // Let the compiler complain. - ds = *i; + ds = *i; + } + else + ds.assign (o + p, n - p); } - else - ds.assign (o + 2, n - 2); if (!ds.empty ()) { @@ -3442,67 +3620,14 @@ namespace build2 // if (!d.empty ()) { - // Ignore any paths containing '.', '..' components. Allow - // any directory separators thought (think -I$src_root/foo - // on Windows). - // - if (d.absolute () && d.normalized (false)) - { - // If we have a candidate out_base, see if this is its - // src_base. - // - if (s != nullptr) - { - const dir_path& bp (s->src_path ()); - - if (d.sub (bp)) - { - if (p.empty () || d.leaf (bp) == p) - { - // We've got a pair. - // - so_map.emplace (move (d), s->out_path () / p); - s = nullptr; // Taken. - continue; - } - } - - // Not a pair. Fall through to consider as out_base. - // - s = nullptr; - } - - // See if this path is inside a project with an out-of- - // tree build and is in the out directory tree. - // - const scope& bs (t.ctx.scopes.find (d)); - if (bs.root_scope () != nullptr) - { - const dir_path& bp (bs.out_path ()); - if (bp != bs.src_path ()) - { - bool e; - if ((e = (d == bp)) || d.sub (bp)) - { - s = &bs; - if (e) - p.clear (); - else - p = d.leaf (bp); - } - } - } - } - else - s = nullptr; - - ds = move (d).string (); // Move the buffer out. + if (!builder.next (move (d))) + ds = move (d).string (); // Move the buffer back out. } else - s = nullptr; + builder.skip (); } else - s = nullptr; + builder.skip (); } } @@ -3511,16 +3636,6 @@ namespace build2 // Some compile options (e.g., -std, -m) affect the preprocessor. // - // Currently Clang supports importing "header modules" even when in - // the TS mode. And "header modules" support macros which means - // imports have to be resolved during preprocessing. Which poses a - // bit of a chicken and egg problem for us. For now, the workaround - // is to remove the -fmodules-ts option when preprocessing. Hopefully - // there will be a "pure modules" mode at some point. - // - // @@ MODHDR Clang: should be solved with the dynamic module mapper - // if/when Clang supports it? - // // Don't treat warnings as errors. // @@ -3547,15 +3662,47 @@ namespace build2 args.push_back ("/nologo"); append_options (args, cmode); - append_sys_inc_options (args); // Extra system header dirs (last). + append_sys_hdr_options (args); // Extra system header dirs (last). + + // Note that for MSVC stderr is merged with stdout and is then + // parsed, so no append_diag_color_options() call. - // See perform_update() for details on overriding the default - // exceptions and runtime. + // See perform_update() for details on the choice of options. // - if (x_lang == lang::cxx && !find_option_prefix ("/EH", args)) + // NOTE: see also the predefs rule if adding anything here. + // + { + bool sc (find_option_prefixes ( + {"/source-charset:", "-source-charset:"}, args)); + bool ec (find_option_prefixes ( + {"/execution-charset:", "-execution-charset:"}, args)); + + if (!sc && !ec) + args.push_back ("/utf-8"); + else + { + if (!sc) + args.push_back ("/source-charset:UTF-8"); + + if (!ec) + args.push_back ("/execution-charset:UTF-8"); + } + } + + if (cvariant != "clang" && isystem (*this)) + { + if (find_option_prefixes ({"/external:I", "-external:I"}, args) && + !find_option_prefixes ({"/external:W", "-external:W"}, args)) + args.push_back ("/external:W0"); + } + + if (x_lang == lang::cxx && + !find_option_prefixes ({"/EH", "-EH"}, args)) args.push_back ("/EHsc"); - if (!find_option_prefixes ({"/MD", "/MT"}, args)) + // NOTE: see similar code in search_modules(). + // + if (!find_option_prefixes ({"/MD", "/MT", "-MD", "-MT"}, args)) args.push_back ("/MD"); args.push_back ("/P"); // Preprocess to file. @@ -3566,16 +3713,16 @@ namespace build2 msvc_sanitize_cl (args); - psrc = auto_rmfile (t.path () + x_pext); + psrc = ctx.fcache->create (t.path () + pext, !modules); if (fc) { args.push_back ("/Fi:"); - args.push_back (psrc.path.string ().c_str ()); + args.push_back (psrc.path ().string ().c_str ()); } else { - out = "/Fi" + psrc.path.string (); + out = "/Fi" + psrc.path ().string (); args.push_back (out.c_str ()); } @@ -3585,8 +3732,21 @@ namespace build2 } case compiler_class::gcc: { + append_options (args, cmode); + append_sys_hdr_options (args); // Extra system header dirs (last). + + // If not gen, then stderr is discarded. + // + if (gen) + append_diag_color_options (args); + // See perform_update() for details on the choice of options. // + // NOTE: see also the predefs rule if adding anything here. + // + if (!find_option_prefix ("-finput-charset=", args)) + args.push_back ("-finput-charset=UTF-8"); + if (ot == otype::s) { if (tclass == "linux" || tclass == "bsd") @@ -3595,8 +3755,7 @@ namespace build2 if (ctype == compiler_type::clang && tsys == "win32-msvc") { - initializer_list<const char*> os {"-nostdlib", "-nostartfiles"}; - if (!find_options (os, cmode) && !find_options (os, args)) + if (!find_options ({"-nostdlib", "-nostartfiles"}, args)) { args.push_back ("-D_MT"); args.push_back ("-D_DLL"); @@ -3615,10 +3774,6 @@ namespace build2 } } - append_options (args, cmode, - cmode.size () - (modules && clang ? 1 : 0)); - append_sys_inc_options (args); // Extra system header dirs (last). - // Setup the dynamic module mapper if needed. // // Note that it's plausible in the future we will use it even if @@ -3710,9 +3865,9 @@ namespace build2 // Preprocessor output. // - psrc = auto_rmfile (t.path () + x_pext); + psrc = ctx.fcache->create (t.path () + pext, !modules); args.push_back ("-o"); - args.push_back (psrc.path.string ().c_str ()); + args.push_back (psrc.path ().string ().c_str ()); } else { @@ -3836,15 +3991,12 @@ namespace build2 // to be inconvenient: some users like to re-run a failed build with // -s not to get "swamped" with errors. // - bool df (!ctx.match_only && !ctx.dry_run_option); - - const file* ht (enter_header (a, bs, t, li, - move (hp), cache, false /* norm */, - pfx_map, so_map).first); - if (ht == nullptr) + auto fail = [&ctx] (const auto& h) -> optional<bool> { + bool df (!ctx.match_only && !ctx.dry_run_option); + diag_record dr; - dr << error << "header '" << hp << "' not found and no rule to " + dr << error << "header " << h << " not found and no rule to " << "generate it"; if (df) @@ -3853,41 +4005,44 @@ namespace build2 if (verb < 4) dr << info << "re-run with --verbose=4 for more information"; - if (df) return nullopt; else dr << endf; - } + if (df) + return nullopt; + else + dr << endf; + }; - // If we are reading the cache, then it is possible the file has since - // been removed (think of a header in /usr/local/include that has been - // uninstalled and now we need to use one from /usr/include). This - // will lead to the match failure which we translate to a restart. - // - if (optional<bool> u = inject_header (a, t, *ht, mt, false /* fail */)) + if (const file* ht = enter_header ( + a, bs, t, li, + move (hp), cache, cache /* normalized */, + pfx_map, so_map).first) { - // Verify/add it to the dependency database. + // If we are reading the cache, then it is possible the file has + // since been removed (think of a header in /usr/local/include that + // has been uninstalled and now we need to use one from + // /usr/include). This will lead to the match failure which we + // translate to a restart. And, yes, this case will trip up + // inject_header(), not enter_header(). // - if (!cache) - dd.expect (ht->path ()); - - skip_count++; - return *u; - } - else if (!cache) - { - diag_record dr; - dr << error << "header " << *ht << " not found and no rule to " - << "generate it"; - - if (df) - dr << info << "failure deferred to compiler diagnostics"; - - if (verb < 4) - dr << info << "re-run with --verbose=4 for more information"; + if (optional<bool> u = inject_header (a, t, *ht, mt, false /*fail*/)) + { + // Verify/add it to the dependency database. + // + if (!cache) + dd.expect (ht->path ()); - if (df) return nullopt; else dr << endf; + skip_count++; + return *u; + } + else if (cache) + { + dd.write (); // Invalidate this line. + return true; + } + else + return fail (*ht); } - - dd.write (); // Invalidate this line. - return true; + else + return fail (hp); // hp is still valid. }; // As above but for a header unit. Note that currently it is only used @@ -3904,13 +4059,13 @@ namespace build2 const file* ht ( enter_header (a, bs, t, li, - move (hp), true /* cache */, true /* norm */, + move (hp), true /* cache */, false /* normalized */, pfx_map, so_map).first); - if (ht == nullptr) + if (ht == nullptr) // hp is still valid. { diag_record dr; - dr << error << "header '" << hp << "' not found and no rule to " + dr << error << "header " << hp << " not found and no rule to " << "generate it"; if (df) @@ -3927,7 +4082,7 @@ namespace build2 // if (inject_header (a, t, *ht, mt, false /* fail */)) { - const file& bt (make_header_sidebuild (a, bs, li, *ht)); + const file& bt (make_header_sidebuild (a, bs, t, li, *ht)); // It doesn't look like we need the cache semantics here since given // the header, we should be able to build its BMI. In other words, a @@ -3956,6 +4111,16 @@ namespace build2 const path* drmp (nullptr); // Points to drm.path () if active. + // If things go wrong (and they often do in this area), give the user a + // bit extra context. + // + auto df = make_diag_frame ( + [&src](const diag_record& dr) + { + if (verb != 0) + dr << info << "while extracting header dependencies from " << src; + }); + // If nothing so far has invalidated the dependency database, then try // the cached data before running the compiler. // @@ -3990,9 +4155,13 @@ namespace build2 // If modules are enabled, then we keep the preprocessed output // around (see apply() for details). // - return modules - ? make_pair (auto_rmfile (t.path () + x_pext, false), true) - : make_pair (auto_rmfile (), false); + if (modules) + { + result.first = ctx.fcache->create_existing (t.path () + pext); + result.second = true; + } + + return; } // This can be a header or a header unit (mapping). @@ -4045,7 +4214,7 @@ namespace build2 // Bail out early if we have deferred a failure. // - return make_pair (auto_rmfile (), false); + return; } } } @@ -4059,11 +4228,24 @@ namespace build2 if (args.empty () || gen != args_gen) drmp = init_args (gen); + // If we are producing the preprocessed output, get its write + // handle. + // + file_cache::write psrcw (psrc + ? psrc.init_new () + : file_cache::write ()); + if (verb >= 3) print_process (args.data ()); // Disable pipe mode. process pr; + // We use the fdstream_mode::skip mode on stdout (cannot be used + // on both) and so dbuf must be destroyed (closed) first. + // + ifdstream is (ifdstream::badbit); + diag_buffer dbuf (ctx); + try { // Assume the preprocessed output (if produced) is usable @@ -4084,217 +4266,229 @@ namespace build2 // bool good_error (false), bad_error (false); - // If we have no generated header support, then suppress all - // diagnostics (if things go badly we will restart with this - // support). - // - if (drmp == nullptr) // Dependency info goes to stdout. + if (mod_mapper) // Dependency info is implied by mapper requests. { - assert (!sense_diag); // Note: could support with fdselect(). + assert (gen && !sense_diag); // Not used in this mode. - // For VC with /P the dependency info and diagnostics all go - // to stderr so redirect it to stdout. + // Note that here we use the skip mode on the diagnostics + // stream which means we have to use own instance of stdout + // stream for the correct destruction order (see below). // - pr = process ( - cpath, - args.data (), - 0, - -1, - cclass == compiler_class::msvc ? 1 : gen ? 2 : -2, - nullptr, // CWD - env.empty () ? nullptr : env.data ()); - } - else // Dependency info goes to a temporary file. - { pr = process (cpath, - args.data (), - mod_mapper ? -1 : 0, - mod_mapper ? -1 : 2, // Send stdout to stderr. - gen ? 2 : sense_diag ? -1 : -2, + args, + -1, + -1, + diag_buffer::pipe (ctx), nullptr, // CWD env.empty () ? nullptr : env.data ()); - // Monitor for module mapper requests and/or diagnostics. If - // diagnostics is detected, mark the preprocessed output as - // unusable for compilation. - // - if (mod_mapper || sense_diag) + dbuf.open (args[0], + move (pr.in_efd), + fdstream_mode::non_blocking | + fdstream_mode::skip); + try { - module_mapper_state mm_state (skip_count, imports); + gcc_module_mapper_state mm_state (skip_count, imports); + + // Note that while we read both streams until eof in normal + // circumstances, we cannot use fdstream_mode::skip for the + // exception case on both of them: we may end up being + // blocked trying to read one stream while the process may + // be blocked writing to the other. So in case of an + // exception we only skip the diagnostics and close the + // mapper stream hard. The latter (together with closing of + // the stdin stream) should happen first so the order of + // the following variable is important. + // + // Note also that we open the stdin stream in the blocking + // mode. + // + ifdstream is (move (pr.in_ofd), + fdstream_mode::non_blocking, + ifdstream::badbit); // stdout + ofdstream os (move (pr.out_fd)); // stdin (badbit|failbit) + + // Read until we reach EOF on all streams. + // + // Note that if dbuf is not opened, then we automatically + // get an inactive nullfd entry. + // + fdselect_set fds {is.fd (), dbuf.is.fd ()}; + fdselect_state& ist (fds[0]); + fdselect_state& dst (fds[1]); - const char* w (nullptr); - try + bool more (false); + for (string l; ist.fd != nullfd || dst.fd != nullfd; ) { - // For now we don't need to do both so let's use a simpler - // blocking implementation. Note that the module mapper - // also needs to be adjusted when switching to the - // non-blocking version. + // @@ Currently we will accept a (potentially truncated) + // line that ends with EOF rather than newline. // -#if 1 - assert (mod_mapper != sense_diag); - - if (mod_mapper) + if (ist.fd != nullfd && getline_non_blocking (is, l)) { - w = "module mapper request"; - - // Note: the order is important (see the non-blocking - // verison for details). - // - ifdstream is (move (pr.in_ofd), - fdstream_mode::skip, - ifdstream::badbit); - ofdstream os (move (pr.out_fd)); - - do + if (eof (is)) { - if (!gcc_module_mapper (mm_state, - a, bs, t, li, - is, os, - dd, update, bad_error, - pfx_map, so_map)) - break; + os.close (); + is.close (); - } while (!is.eof ()); - - os.close (); - is.close (); - } - - if (sense_diag) - { - w = "diagnostics"; - ifdstream is (move (pr.in_efd), fdstream_mode::skip); - puse = puse && (is.peek () == ifdstream::traits_type::eof ()); - is.close (); - } -#else - fdselect_set fds; - auto add = [&fds] (const auto_fd& afd) -> fdselect_state* - { - int fd (afd.get ()); - fdmode (fd, fdstream_mode::non_blocking); - fds.push_back (fd); - return &fds.back (); - }; - - // Note that while we read both streams until eof in - // normal circumstances, we cannot use fdstream_mode::skip - // for the exception case on both of them: we may end up - // being blocked trying to read one stream while the - // process may be blocked writing to the other. So in case - // of an exception we only skip the diagnostics and close - // the mapper stream hard. The latter should happen first - // so the order of the following variable is important. - // - ifdstream es; - ofdstream os; - ifdstream is; - - fdselect_state* ds (nullptr); - if (sense_diag) - { - w = "diagnostics"; - ds = add (pr.in_efd); - es.open (move (pr.in_efd), fdstream_mode::skip); - } - - fdselect_state* ms (nullptr); - if (mod_mapper) - { - w = "module mapper request"; - ms = add (pr.in_ofd); - is.open (move (pr.in_ofd)); - os.open (move (pr.out_fd)); // Note: blocking. - } - - // Set each state pointer to NULL when the respective - // stream reaches eof. - // - while (ds != nullptr || ms != nullptr) - { - w = "output"; - ifdselect (fds); + if (more) + throw_generic_ios_failure (EIO, "unexpected EOF"); - // First read out the diagnostics in case the mapper - // interaction produces more. To make sure we don't get - // blocked by full stderr, the mapper should only handle - // one request at a time. - // - if (ds != nullptr && ds->ready) + ist.fd = nullfd; + } + else { - w = "diagnostics"; - - for (char buf[4096];;) - { - streamsize c (sizeof (buf)); - streamsize n (es.readsome (buf, c)); + optional<bool> r ( + gcc_module_mapper (mm_state, + a, bs, t, li, + l, os, + dd, update, bad_error, + pfx_map, so_map)); - if (puse && n > 0) - puse = false; + more = !r.has_value (); - if (n < c) - break; - } - - if (es.eof ()) - { - es.close (); - ds->fd = nullfd; - ds = nullptr; - } - } - - if (ms != nullptr && ms->ready) - { - w = "module mapper request"; - - gcc_module_mapper (mm_state, - a, bs, t, li, - is, os, - dd, update, bad_error, - pfx_map, so_map); - if (is.eof ()) + if (more || *r) + l.clear (); + else { os.close (); is.close (); - ms->fd = nullfd; - ms = nullptr; + ist.fd = nullfd; } } + + continue; } -#endif - } - catch (const io_error& e) - { - if (pr.wait ()) - fail << "io error handling " << x_lang << " compiler " - << w << ": " << e; - // Fall through. + ifdselect (fds); + + if (dst.ready) + { + if (!dbuf.read ()) + dst.fd = nullfd; + } } - if (mod_mapper) - md.header_units += mm_state.header_units; + md.header_units += mm_state.header_units; + } + catch (const io_error& e) + { + // Note that diag_buffer handles its own io errors so this + // is about mapper stdin/stdout. + // + if (pr.wait ()) + fail << "io error handling " << x_lang << " compiler " + << "module mapper request: " << e; + + // Fall through. } // The idea is to reduce this to the stdout case. // - pr.wait (); - - // With -MG we want to read dependency info even if there is - // an error (in case an outdated header file caused it). But - // with the GCC module mapper an error is non-negotiable, so - // to speak, and so we want to skip all of that. In fact, we - // now write directly to depdb without generating and then + // We now write directly to depdb without generating and then // parsing an intermadiate dependency makefile. // - pr.in_ofd = (ctype == compiler_type::gcc && mod_mapper) - ? auto_fd (nullfd) - : fdopen (*drmp, fdopen_mode::in); + pr.wait (); + pr.in_ofd = nullfd; + } + else + { + // If we have no generated header support, then suppress all + // diagnostics (if things go badly we will restart with this + // support). + // + if (drmp == nullptr) // Dependency info goes to stdout. + { + assert (!sense_diag); // Note: could support if necessary. + + // For VC with /P the dependency info and diagnostics all go + // to stderr so redirect it to stdout. + // + int err ( + cclass == compiler_class::msvc ? 1 : // stdout + !gen ? -2 : // /dev/null + diag_buffer::pipe (ctx, sense_diag /* force */)); + + pr = process ( + cpath, + args, + 0, + -1, + err, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + + if (cclass != compiler_class::msvc && gen) + { + dbuf.open (args[0], + move (pr.in_efd), + fdstream_mode::non_blocking); // Skip on stdout. + } + } + else // Dependency info goes to temporary file. + { + // Since we only need to read from one stream (dbuf) let's + // use the simpler blocking setup. + // + int err ( + !gen && !sense_diag ? -2 : // /dev/null + diag_buffer::pipe (ctx, sense_diag /* force */)); + + pr = process (cpath, + args, + 0, + 2, // Send stdout to stderr. + err, + nullptr, // CWD + env.empty () ? nullptr : env.data ()); + + if (gen || sense_diag) + { + dbuf.open (args[0], move (pr.in_efd)); + dbuf.read (sense_diag /* force */); + } + + if (sense_diag) + { + if (!dbuf.buf.empty ()) + { + puse = false; + dbuf.buf.clear (); // Discard. + } + } + + // The idea is to reduce this to the stdout case. + // + // Note that with -MG we want to read dependency info even + // if there is an error (in case an outdated header file + // caused it). + // + pr.wait (); + pr.in_ofd = fdopen (*drmp, fdopen_mode::in); + } } + // Read and process dependency information, if any. + // if (pr.in_ofd != nullfd) { + // We have two cases here: reading from stdout and potentially + // stderr (dbuf) or reading from file (see the process startup + // code above for details). If we have to read from two + // streams, then we have to use the non-blocking setup. But we + // cannot use the non-blocking setup uniformly because on + // Windows it's only suppored for pipes. So things are going + // to get a bit hairy. + // + // And there is another twist to this: for MSVC we redirect + // stderr to stdout since the header dependency information is + // part of the diagnostics. If, however, there is some real + // diagnostics, we need to pass it through, potentially with + // buffering. The way we achieve this is by later opening dbuf + // in the EOF state and using it to buffer or stream the + // diagnostics. + // + bool nb (dbuf.is.is_open ()); + // We may not read all the output (e.g., due to a restart). // Before we used to just close the file descriptor to signal // to the other end that we are not interested in the rest. @@ -4302,16 +4496,70 @@ namespace build2 // impolite and complains, loudly (broken pipe). So now we are // going to skip until the end. // - ifdstream is (move (pr.in_ofd), - fdstream_mode::text | fdstream_mode::skip, - ifdstream::badbit); + // Note that this means we are not using skip on dbuf (see + // above for the destruction order details). + // + { + fdstream_mode m (fdstream_mode::text | + fdstream_mode::skip); + + if (nb) + m |= fdstream_mode::non_blocking; + + is.open (move (pr.in_ofd), m); + } + + fdselect_set fds; + if (nb) + fds = {is.fd (), dbuf.is.fd ()}; size_t skip (skip_count); - string l; // Reuse. + string l, l2; // Reuse. for (bool first (true), second (false); !restart; ) { - if (eof (getline (is, l))) - break; + if (nb) + { + fdselect_state& ist (fds[0]); + fdselect_state& dst (fds[1]); + + // We read until we reach EOF on both streams. + // + if (ist.fd == nullfd && dst.fd == nullfd) + break; + + if (ist.fd != nullfd && getline_non_blocking (is, l)) + { + if (eof (is)) + { + ist.fd = nullfd; + continue; + } + + // Fall through to parse (and clear) the line. + } + else + { + ifdselect (fds); + + if (dst.ready) + { + if (!dbuf.read ()) + dst.fd = nullfd; + } + + continue; + } + } + else + { + if (eof (getline (is, l))) + { + if (bad_error && !l2.empty ()) // MSVC only (see below). + dbuf.write (l2, true /* newline */); + + break; + } + } l6 ([&]{trace << "header dependency line '" << l << "'";}); @@ -4322,20 +4570,31 @@ namespace build2 case compiler_class::msvc: { // The first line should be the file we are compiling, - // unless this is clang-cl. If it is not, then something - // went wrong even before we could compile anything - // (e.g., file does not exist). In this case the first - // line (and everything after it) is presumably - // diagnostics. + // unless this is clang-cl. + // + // If it is not, then we have several possibilities: // - // It can, however, be a command line warning, for - // example: + // First, it can be a command line warning, for example: // // cl : Command line warning D9025 : overriding '/W3' with '/W4' // // So we try to detect and skip them assuming they will // also show up during the compilation proper. // + // Another possibility is a mis-spelled option that is + // treated as another file to compile, for example: + // + // cl junk /nologo /P /showIncluses /TP foo.cxx + // junk + // foo.cxx + // c1xx: fatal error C1083: Cannot open source file: 'junk': No such file or directory + // + // Yet another possibility is that something went wrong + // even before we could compile anything. + // + // So the plan is to keep going (in the hope of C1083) + // but print the last line if there is no more input. + // if (first) { if (cvariant != "clang") @@ -4346,16 +4605,29 @@ namespace build2 // size_t p (msvc_sense_diag (l, 'D').first); if (p != string::npos && l[p] == '9') - continue; - - text << l; - bad_error = true; - break; + ; // Skip. + else + { + l2 = l; + + if (!bad_error) + { + dbuf.open_eof (args[0]); + bad_error = true; + } + } + + l.clear (); + continue; } + + l2.clear (); + // Fall through. } first = false; + l.clear (); continue; } @@ -4363,8 +4635,13 @@ namespace build2 if (f.empty ()) // Some other diagnostics. { - text << l; - bad_error = true; + if (!bad_error) + { + dbuf.open_eof (args[0]); + bad_error = true; + } + + dbuf.write (l, true /* newline */); break; } @@ -4458,12 +4735,9 @@ namespace build2 if (l.empty () || l[0] != '^' || l[1] != ':' || l[2] != ' ') { - // @@ Hm, we don't seem to redirect stderr to stdout - // for this class of compilers so I wonder why - // we are doing this? - // if (!l.empty ()) - text << l; + l5 ([&]{trace << "invalid header dependency line '" + << l << "'";}); bad_error = true; break; @@ -4478,22 +4752,37 @@ namespace build2 // "^: \". // if (l.size () == 4 && l[3] == '\\') + { + l.clear (); continue; + } else pos = 3; // Skip "^: ". // Fall through to the 'second' block. } - if (second) - { - second = false; - next_make (l, pos); // Skip the source file. - } - while (pos != l.size ()) { - string f (next_make (l, pos)); + string f ( + make_parser::next ( + l, pos, make_parser::type::prereq).first); + + if (pos != l.size () && l[pos] == ':') + { + l5 ([&]{trace << "invalid header dependency line '" + << l << "'";}); + bad_error = true; + break; + } + + // Skip the source file. + // + if (second) + { + second = false; + continue; + } // Skip until where we left off. // @@ -4537,19 +4826,56 @@ namespace build2 } if (bad_error || md.deferred_failure) + { + // Note that it may be tempting to finish reading out the + // diagnostics before bailing out. But that may end up in + // a deadlock if the process gets blocked trying to write + // to stdout. + // break; + } + + l.clear (); + } + + // We may bail out early from the above loop in case of a + // restart or error. Which means the stderr stream (dbuf) may + // still be open and we need to close it before closing the + // stdout stream (which may try to skip). + // + // In this case we may also end up with incomplete diagnostics + // so discard it. + // + // Generally, it may be tempting to start thinking if we + // should discard buffered diagnostics in other cases, such as + // restart. But remember that during serial execution it will + // go straight to stderr so for consistency (and simplicity) + // we should just print it unless there are good reasons not + // to (also remember that in the restartable modes we normally + // redirect stderr to /dev/null; see the process startup code + // for details). + // + if (dbuf.is.is_open ()) + { + dbuf.is.close (); + dbuf.buf.clear (); } // Bail out early if we have deferred a failure. // + // Let's ignore any buffered diagnostics in this case since + // it would appear after the deferred failure note. + // if (md.deferred_failure) { is.close (); - return make_pair (auto_rmfile (), false); + return; } - // In case of VC, we are parsing stderr and if things go - // south, we need to copy the diagnostics for the user to see. + // In case of VC, we are parsing redirected stderr and if + // things go south, we need to copy the diagnostics for the + // user to see. Note that we should have already opened dbuf + // at EOF above. // if (bad_error && cclass == compiler_class::msvc) { @@ -4564,7 +4890,7 @@ namespace build2 l.compare (p.first, 4, "1083") != 0 && msvc_header_c1083 (l, p)) { - diag_stream_lock () << l << endl; + dbuf.write (l, true /* newline */); } } } @@ -4587,22 +4913,42 @@ namespace build2 if (pr.wait ()) { - if (!bad_error) // Ignore expected successes (we are done). - continue; + { + diag_record dr; + + if (bad_error) + dr << fail << "expected error exit status from " + << x_lang << " compiler"; - fail << "expected error exit status from " << x_lang - << " compiler"; + if (dbuf.is_open ()) + dbuf.close (move (dr)); // Throws if error. + } + + // Ignore expected successes (we are done). + // + if (!restart && psrc) + psrcw.close (); + + continue; } else if (pr.exit->normal ()) { if (good_error) // Ignore expected errors (restart). + { + if (dbuf.is_open ()) + dbuf.close (); + continue; + } } // Fall through. } catch (const io_error& e) { + // Ignore buffered diagnostics (since reading it could be the + // cause of this failure). + // if (pr.wait ()) fail << "unable to read " << x_lang << " compiler header " << "dependency output: " << e; @@ -4611,18 +4957,23 @@ namespace build2 } assert (pr.exit && !*pr.exit); - const process_exit& e (*pr.exit); + const process_exit& pe (*pr.exit); // For normal exit we assume the child process issued some // diagnostics. // - if (e.normal ()) + if (pe.normal ()) { - // If this run was with the generated header support then we - // have issued diagnostics and it's time to give up. + // If this run was with the generated header support then it's + // time to give up. // if (gen) + { + if (dbuf.is_open ()) + dbuf.close (args, pe, 2 /* verbosity */); + throw failed (); + } // Just to recap, being here means something is wrong with the // source: it can be a missing generated header, it can be an @@ -4640,7 +4991,12 @@ namespace build2 // or will issue diagnostics. // if (restart) + { + if (dbuf.is_open ()) + dbuf.close (); + l6 ([&]{trace << "trying again without generated headers";}); + } else { // In some pathological situations we may end up switching @@ -4665,19 +5021,24 @@ namespace build2 // example, because we have removed all the partially // preprocessed source files). // - if (force_gen_skip && *force_gen_skip == skip_count) { - diag_record dr (fail); + diag_record dr; + if (force_gen_skip && *force_gen_skip == skip_count) + { + dr << + fail << "inconsistent " << x_lang << " compiler behavior" << + info << "run the following two commands to investigate"; - dr << "inconsistent " << x_lang << " compiler behavior" << - info << "run the following two commands to investigate"; + dr << info; + print_process (dr, args.data ()); // No pipes. - dr << info; - print_process (dr, args.data ()); // No pipes. + init_args ((gen = true)); + dr << info << ""; + print_process (dr, args.data ()); // No pipes. + } - init_args ((gen = true)); - dr << info << ""; - print_process (dr, args.data ()); // No pipes. + if (dbuf.is_open ()) + dbuf.close (move (dr)); // Throws if error. } restart = true; @@ -4688,7 +5049,15 @@ namespace build2 continue; } else - run_finish (args, pr); // Throws. + { + if (dbuf.is_open ()) + { + dbuf.close (args, pe, 2 /* verbosity */); + throw failed (); + } + else + run_finish (args, pr, 2 /* verbosity */); + } } catch (const process_error& e) { @@ -4713,8 +5082,10 @@ namespace build2 // dd.expect (""); - puse = puse && !reprocess && !psrc.path.empty (); - return make_pair (move (psrc), puse); + puse = puse && !reprocess && psrc; + + result.first = move (psrc); + result.second = puse; } // Return the translation unit information (last argument) and its @@ -4726,13 +5097,25 @@ namespace build2 file& t, linfo li, const file& src, - auto_rmfile& psrc, + file_cache::entry& psrc, const match_data& md, const path& dd, unit& tu) const { tracer trace (x, "compile_rule::parse_unit"); + // Scanning .S files with our parser is hazardous since such files + // sometimes use `#`-style comments. Presumably real compilers just + // ignore them in some way, but it doesn't seem worth it to bother in + // our case. Also, the checksum calculation over assembler tokens feels + // iffy. + // + if (x_assembler_cpp (src)) + { + tu.type = unit_type::non_modular; + return ""; + } + otype ot (li.type); // If things go wrong give the user a bit extra context. Let's call it @@ -4777,8 +5160,8 @@ namespace build2 // may extend cc.reprocess to allow specifying where reprocessing is // needed). // - ps = !psrc.path.empty () && !reprocess; - sp = &(ps ? psrc.path : src.path ()); + ps = psrc && !reprocess; + sp = &(ps ? psrc.path () : src.path ()); // VC's preprocessed output, if present, is fully preprocessed. // @@ -4811,8 +5194,6 @@ namespace build2 case compiler_class::msvc: werror = "/WX"; break; } - bool clang (ctype == compiler_type::clang); - append_options (args, t, c_coptions, werror); append_options (args, t, x_coptions, werror); @@ -4825,12 +5206,43 @@ namespace build2 args.push_back ("/nologo"); append_options (args, cmode); - append_sys_inc_options (args); + append_sys_hdr_options (args); + + // Note: no append_diag_color_options() call since the + // diagnostics is discarded. + + // See perform_update() for details on the choice of options. + // + { + bool sc (find_option_prefixes ( + {"/source-charset:", "-source-charset:"}, args)); + bool ec (find_option_prefixes ( + {"/execution-charset:", "-execution-charset:"}, args)); + + if (!sc && !ec) + args.push_back ("/utf-8"); + else + { + if (!sc) + args.push_back ("/source-charset:UTF-8"); - if (x_lang == lang::cxx && !find_option_prefix ("/EH", args)) + if (!ec) + args.push_back ("/execution-charset:UTF-8"); + } + } + + if (cvariant != "clang" && isystem (*this)) + { + if (find_option_prefixes ({"/external:I", "-external:I"}, args) && + !find_option_prefixes ({"/external:W", "-external:W"}, args)) + args.push_back ("/external:W0"); + } + + if (x_lang == lang::cxx && + !find_option_prefixes ({"/EH", "-EH"}, args)) args.push_back ("/EHsc"); - if (!find_option_prefixes ({"/MD", "/MT"}, args)) + if (!find_option_prefixes ({"/MD", "/MT", "-MD", "-MT"}, args)) args.push_back ("/MD"); args.push_back ("/E"); @@ -4844,6 +5256,17 @@ namespace build2 } case compiler_class::gcc: { + append_options (args, cmode); + append_sys_hdr_options (args); + + // Note: no append_diag_color_options() call since the + // diagnostics is discarded. + + // See perform_update() for details on the choice of options. + // + if (!find_option_prefix ("-finput-charset=", args)) + args.push_back ("-finput-charset=UTF-8"); + if (ot == otype::s) { if (tclass == "linux" || tclass == "bsd") @@ -4852,8 +5275,7 @@ namespace build2 if (ctype == compiler_type::clang && tsys == "win32-msvc") { - initializer_list<const char*> os {"-nostdlib", "-nostartfiles"}; - if (!find_options (os, cmode) && !find_options (os, args)) + if (!find_options ({"-nostdlib", "-nostartfiles"}, args)) { args.push_back ("-D_MT"); args.push_back ("-D_DLL"); @@ -4872,10 +5294,6 @@ namespace build2 } } - append_options (args, cmode, - cmode.size () - (modules && clang ? 1 : 0)); - append_sys_inc_options (args); - args.push_back ("-E"); append_lang_options (args, md); @@ -4884,12 +5302,36 @@ namespace build2 // if (ps) { - if (ctype == compiler_type::gcc) + switch (ctype) { - // Note that only these two *plus* -x do the trick. - // - args.push_back ("-fpreprocessed"); - args.push_back ("-fdirectives-only"); + case compiler_type::gcc: + { + // Note that only these two *plus* -x do the trick. + // + args.push_back ("-fpreprocessed"); + args.push_back ("-fdirectives-only"); + break; + } + case compiler_type::clang: + { + // See below for details. + // + if (ctype == compiler_type::clang && + cmaj >= (cvariant != "apple" ? 15 : 16)) + { + if (find_options ({"-pedantic", "-pedantic-errors", + "-Wpedantic", "-Werror=pedantic"}, + args)) + { + args.push_back ("-Wno-gnu-line-marker"); + } + } + + break; + } + case compiler_type::msvc: + case compiler_type::icc: + assert (false); } } @@ -4917,11 +5359,16 @@ namespace build2 for (;;) // Breakout loop. try { - // Disarm the removal of the preprocessed file in case of an error. - // We re-arm it below. + // If we are compiling the preprocessed output, get its read handle. // - if (ps) - psrc.active = false; + file_cache::read psrcr (ps ? psrc.open () : file_cache::read ()); + + // Temporarily disable the removal of the preprocessed file in case of + // an error. We re-enable it below. + // + bool ptmp (ps && psrc.temporary); + if (ptmp) + psrc.temporary = false; process pr; @@ -4938,10 +5385,10 @@ namespace build2 print_process (args); // We don't want to see warnings multiple times so ignore all - // diagnostics. + // diagnostics (thus no need for diag_buffer). // pr = process (cpath, - args.data (), + args, 0, -1, -2, nullptr, // CWD env.empty () ? nullptr : env.data ()); @@ -4953,14 +5400,14 @@ namespace build2 fdstream_mode::binary | fdstream_mode::skip); parser p; - p.parse (is, path_name (*sp), tu); + p.parse (is, path_name (*sp), tu, cid); is.close (); if (pr.wait ()) { - if (ps) - psrc.active = true; // Re-arm. + if (ptmp) + psrc.temporary = true; // Re-enable. unit_type& ut (tu.type); module_info& mi (tu.module_info); @@ -4968,7 +5415,9 @@ namespace build2 if (!modules) { if (ut != unit_type::non_modular || !mi.imports.empty ()) - fail << "modules support required by " << src; + fail << "modules support required by " << src << + info << "consider enabling modules with " + << x << ".features.modules=true in root.build"; } else { @@ -4993,25 +5442,21 @@ namespace build2 ut = md.type; mi.name = src.path ().string (); } - - // Prior to 15.5 (19.12) VC was not using the 'export module M;' - // syntax so we use the preprequisite type to distinguish - // between interface and implementation units. - // - // @@ TMP: probably outdated. - // - if (ctype == compiler_type::msvc && cmaj == 19 && cmin <= 11) - { - if (ut == unit_type::module_impl && src.is_a (*x_mod)) - ut = unit_type::module_intf; - } } // If we were forced to reprocess, assume the checksum is not // accurate (parts of the translation unit could have been // #ifdef'ed out; see __build2_preprocess). // - return reprocess ? string () : move (p.checksum); + // Also, don't use the checksum for header units since it ignores + // preprocessor directives and may therefore cause us to ignore a + // change to an exported macro. @@ TODO: maybe we should add a + // flag to the parser not to waste time calculating the checksum + // in these cases. + // + return reprocess || ut == unit_type::module_header + ? string () + : move (p.checksum); } // Fall through. @@ -5042,7 +5487,7 @@ namespace build2 info << "then run failing command to display compiler diagnostics"; } else - run_finish (args, pr); // Throws. + run_finish (args, pr, 2 /* verbosity */); // Throws. } catch (const process_error& e) { @@ -5211,6 +5656,9 @@ namespace build2 { tracer trace (x, "compile_rule::search_modules"); + context& ctx (bs.ctx); + const scope& rs (*bs.root_scope ()); + // NOTE: currently we don't see header unit imports (they are handled by // extract_headers() and are not in imports). @@ -5234,7 +5682,7 @@ namespace build2 // // In the above examples one common theme about all the file names is // that they contain, in one form or another, the "tail" of the module - // name ('core'). So what we are going to do is require that, within a + // name (`core`). So what we are going to do is require that, within a // pool (library, executable), the interface file names contain enough // of the module name tail to unambiguously resolve all the module // imports. On our side we are going to implement a "fuzzy" module name @@ -5246,14 +5694,14 @@ namespace build2 // So, the fuzzy match: the idea is that each match gets a score, the // number of characters in the module name that got matched. A match // with the highest score is used. And we use the (length + 1) for a - // match against an actual module name. + // match against an actual (extracted) module name. // // Actually, the scoring system is a bit more elaborate than that. // Consider module name core.window and two files, window.mxx and // abstract-window.mxx: which one is likely to define this module? // Clearly the first, but in the above-described scheme they will get // the same score. More generally, consider these "obvious" (to the - // human) situations: + // human, that is) situations: // // window.mxx vs abstract-window.mxx // details/window.mxx vs abstract-window.mxx @@ -5262,18 +5710,22 @@ namespace build2 // To handle such cases we are going to combine the above primary score // with the following secondary scores (in that order): // - // a) Strength of separation between matched and unmatched parts: + // A) Strength of separation between matched and unmatched parts: // // '\0' > directory separator > other separator > unseparated // // Here '\0' signifies nothing to separate (unmatched part is empty). // - // b) Shortness of the unmatched part. + // B) Shortness of the unmatched part. // - // For std.* modules we only accept non-fuzzy matches (think std.core vs - // some core.mxx). And if such a module is unresolved, then we assume it - // is pre-built and will be found by some other means (e.g., VC's - // IFCPATH). + // Finally, for the fuzzy match we require a complete match of the last + // module (or partition) component. Failed that, we will match `format` + // to `print` because the last character (`t`) is the same. + // + // For std.* modules we only accept non-fuzzy matches (think std.compat + // vs some compat.mxx). And if such a module is unresolved, then we + // assume it is pre-built and will be found by some other means (e.g., + // VC's IFCPATH). // // Note also that we handle module partitions the same as submodules. In // other words, for matching, `.` and `:` are treated the same. @@ -5285,8 +5737,12 @@ namespace build2 // // PPPPABBBB // - // We use decimal instead of binary packing to make it easier to - // separate fields in the trace messages, during debugging, etc. + // Where PPPP is the primary score, A is the A) score, and BBBB is + // the B) score described above. Zero signifies no match. + // + // We use decimal instead of binary packing to make it easier for the + // human to separate fields in the trace messages, during debugging, + // etc. // return m.size () * 100000 + 99999; // Maximum match score. }; @@ -5309,6 +5765,8 @@ namespace build2 (ucase (c1) == c1) != (ucase (c2) == c2)); }; + auto mod_sep = [] (char c) {return c == '.' || c == ':';}; + size_t fn (f.size ()), fi (fn); size_t mn (m.size ()), mi (mn); @@ -5318,6 +5776,10 @@ namespace build2 bool fsep (false); bool msep (false); + // We require complete match of at least last module component. + // + bool match (false); + // Scan backwards for as long as we match. Keep track of the previous // character for case change detection. // @@ -5343,11 +5805,12 @@ namespace build2 // FOObar // bool fs (char_sep (fc)); - bool ms (mc == '_' || mc == '.' || mc == ':'); + bool ms (mod_sep (mc) || mc == '_'); if (fs && ms) { fsep = msep = true; + match = match || mod_sep (mc); continue; } @@ -5365,6 +5828,7 @@ namespace build2 if (fa) {++fi; msep = true;} if (ma) {++mi; fsep = true;} + match = match || mod_sep (mc); continue; } } @@ -5372,6 +5836,39 @@ namespace build2 break; // No match. } + // Deal with edge cases: complete module match and complete file + // match. + // + match = match || mi == 0 || (fi == 0 && mod_sep (m[mi - 1])); + + if (!match) + return 0; + + // Here is another corner case, the module is async_simple:IOExecutor + // and the file names are: + // + // IOExecutor.mxx + // SimpleIOExecutor.mxx + // + // The above implementation treats the latter as better because + // `Simple` in SimpleIOExecutor matches `simple` in async_simple. It's + // unclear what we can do about it without potentially breaking other + // legitimate cases (think Boost_Simple:IOExecutor). Maybe we could + // boost the exact partition name match score, similar to the exact + // module match, as some sort of a heuristics? Let's try. + // + if (fi == 0 && mi != 0 && m[mi - 1] == ':') + { + // Pretend we matched one short of the next module component. This + // way AsyncSimpleIOExecutor.mxx would still be a better match. + // + while (--mi != 0 && m[mi - 1] != '.') + ; + + msep = (mi != 0); // For uncount logic below. + mi++; // One short. + } + // "Uncount" real separators. // if (fsep) fi++; @@ -5400,6 +5897,20 @@ namespace build2 return ps * 100000 + as * 10000 + bs; }; +#if 0 + assert (match ("IOExecutor", "async_simple:IOExecutor") > + match ("SimpleIOExecutor", "async_simple:IOExecutor")); + + assert (match ("IOExecutor", "async_simple:IOExecutor") < + match ("AsyncSimpleIOExecutor", "async_simple:IOExecutor")); + + assert (match ("IOExecutor", "x.async_simple:IOExecutor") > + match ("SimpleIOExecutor", "x.async_simple:IOExecutor")); + + assert (match ("IOExecutor", "x.async_simple:IOExecutor") < + match ("AsyncSimpleIOExecutor", "x.async_simple:IOExecutor")); +#endif + auto& pts (t.prerequisite_targets[a]); size_t start (pts.size ()); // Index of the first to be added. @@ -5414,7 +5925,7 @@ namespace build2 // promise. It has to do with module re-exporting (export import M;). // In this case (currently) all implementations simply treat it as a // shallow (from the BMI's point of view) reference to the module (or an - // implicit import, if you will). Do you see where it's going? Nowever + // implicit import, if you will). Do you see where it's going? Nowhere // good, that's right. This shallow reference means that the compiler // should be able to find BMIs for all the re-exported modules, // recursively. The good news is we are actually in a pretty good shape @@ -5431,10 +5942,11 @@ namespace build2 // 1. There is no good place in prerequisite_targets to store the // exported flag (no, using the marking facility across match/execute // is a bad idea). So what we are going to do is put re-exported - // bmi{}s at the back and store (in the target's data pad) the start - // position. One bad aspect about this part is that we assume those - // bmi{}s have been matched by the same rule. But let's not kid - // ourselves, there will be no other rule that matches bmi{}s. + // bmi{}s at the back and store (in the target's auxiliary data + // storage) the start position. One bad aspect about this part is + // that we assume those bmi{}s have been matched by the same + // rule. But let's not kid ourselves, there will be no other rule + // that matches bmi{}s. // // @@ I think now we could use prerequisite_targets::data for this? // @@ -5459,6 +5971,7 @@ namespace build2 // so we actually don't need to pass any extra options (unless things // get moved) but they still need access to the BMIs (and things will // most likely have to be done differenly for distributed compilation). + // @@ Note: no longer the case for Clang either. // // So the revised plan: on the off chance that some implementation will // do it differently we will continue maintaing the imported/re-exported @@ -5552,6 +6065,8 @@ namespace build2 continue; // Scan the rest to detect if all done. } } + else + assert (name != m.name); // No duplicates. done = false; } @@ -5559,159 +6074,372 @@ namespace build2 return r; }; - for (prerequisite_member p: group_prerequisite_members (a, t)) + // Find the module in prerequisite targets of a library (recursively) + // seeing through libu*{}. Note: sets the `done` flag. See similar + // logic in pkgconfig_save(). + // + auto find = [a, &bs, this, + &check_exact, &done] (const file& l, + const auto& find) -> void { - if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. - continue; - - const target* pt (p.load ()); // Should be cached for libraries. - - if (pt != nullptr) + for (const target* pt: l.prerequisite_targets[a]) { - const file* lt (nullptr); - - if (const libx* l = pt->is_a<libx> ()) - lt = link_member (*l, a, li); - else if (pt->is_a<liba> () || pt->is_a<libs> () || pt->is_a<libux> ()) - lt = &pt->as<file> (); + if (pt == nullptr) + continue; - // If this is a library, check its bmi{}s and mxx{}s. + // Note that here we (try) to use whatever flavor of bmi*{} is + // available. // - if (lt != nullptr) + // @@ MOD: BMI compatibility check. + // + if (pt->is_a<bmix> ()) { - for (const target* bt: lt->prerequisite_targets[a]) + // If the extraction of the module information for this BMI failed + // and we have deferred failure to compiler diagnostics, then + // there will be no module name assigned. It would have been + // better to make sure that's the cause, but that won't be easy. + // + const string* n (cast_null<string> ( + pt->state[a].vars[c_module_name])); + if (n != nullptr) { - if (bt == nullptr) - continue; + if (const target** p = check_exact (*n)) + *p = pt; + } + } + else if (pt->is_a (*x_mod)) + { + // This is an installed library with a list of module sources (the + // source are specified as prerequisites but the fallback file + // rule puts them into prerequisite_targets for us). + // + // The module names should be specified but if not assume + // something else is going on (like a deferred failure) and + // ignore. + // + // Note also that besides modules, prerequisite_targets may + // contain libraries which are interface dependencies of this + // library and which may be called to resolve its module + // dependencies. + // + const string* n (cast_null<string> (pt->vars[c_module_name])); - // Note that here we (try) to use whatever flavor of bmi*{} is - // available. - // - // @@ MOD: BMI compatibility check. - // @@ UTL: we need to (recursively) see through libu*{} (and - // also in pkgconfig_save()). + if (n == nullptr) + continue; + + if (const target** p = check_exact (*n)) + { + // It seems natural to build a BMI type that corresponds to the + // library type. After all, this is where the object file part + // of the BMI is going to come from (unless it's a module + // interface-only library). // - if (bt->is_a<bmix> ()) + *p = &this->make_module_sidebuild ( + a, bs, &l, link_type (l).type, *pt, *n).first; // GCC 4.9 + } + } + // Note that in prerequisite targets we will have the libux{} + // members, not the group. + // + else if (const libux* pl = pt->is_a<libux> ()) + find (*pl, find); + else + continue; + + if (done) + break; + } + }; + + // Pre-resolve std modules in an ad hoc way for certain compilers. + // + // @@ TODO: cache x_stdlib value. + // + if ((ctype == compiler_type::msvc) || + (ctype == compiler_type::clang && + cmaj >= 17 && + cast<string> (rs[x_stdlib]) == "libc++")) + { + // Similar logic to check_exact() above. + // + done = true; + + for (size_t i (0); i != n; ++i) + { + module_import& m (imports[i]); + + if (m.name == "std" || m.name == "std.compat") + { + otype ot (otype::e); + const target* mt (nullptr); + + switch (ctype) + { + case compiler_type::clang: { - const string& n ( - cast<string> (bt->state[a].vars[c_module_name])); + if (m.name != "std") + fail << "module " << m.name << " not yet provided by libc++"; - if (const target** p = check_exact (n)) - *p = bt; + // Find or insert std.cppm (similar code to pkgconfig.cxx). + // + // Note: build_install_data is absolute and normalized. + // + mt = &ctx.targets.insert_locked ( + *x_mod, + (dir_path (build_install_data) /= "libbuild2") /= "cc", + dir_path (), + "std", + string ("cppm"), // For C++14 during bootstrap. + target_decl::implied, + trace).first; + + // Which output type should we use, static or shared? The + // correct way would be to detect whether static or shared + // version of libc++ is to be linked and use the corresponding + // type. And we could do that by looking for -static-libstdc++ + // in loption (and no, it's not -static-libc++). + // + // But, looking at the object file produced from std.cppm, it + // only contains one symbol, the static object initializer. + // And this is unlikely to change since all other non-inline + // or template symbols should be in libc++. So feels like it's + // not worth the trouble and one variant should be good enough + // for both cases. Let's use the shared one for less + // surprising diagnostics (as in, "why are you linking obje{} + // to a shared library?") + // + // (Of course, theoretically, std.cppm could detect via a + // macro whether it's being compiled with -fPIC or not and do + // things differently, but this seems far-fetched). + // + ot = otype::s; + + break; } - else if (bt->is_a (*x_mod)) + case compiler_type::msvc: { - // This is an installed library with a list of module sources - // (the source are specified as prerequisites but the fallback - // file rule puts them into prerequisite_targets for us). + // For MSVC, the source files std.ixx and std.compat.ixx are + // found in the modules/ subdirectory which is a sibling of + // include/ in the MSVC toolset (and "that is a contract with + // customers" to quote one of the developers). // - // The module names should be specified but if not assume - // something else is going on and ignore. + // The problem of course is that there are multiple system + // header search directories (for example, as specified in the + // INCLUDE environment variable) and which one of them is for + // the MSVC toolset is not specified. So what we are going to + // do is search for one of the well-known standard C++ headers + // and assume that the directory where we found it is the one + // we are looking for. Or we could look for something + // MSVC-specific like vcruntime.h. + // + dir_path modules; + if (optional<path> p = find_system_header (path ("vcruntime.h"))) + { + p->make_directory (); // Strip vcruntime.h. + if (p->leaf () == path ("include")) // Sanity check. + { + modules = path_cast<dir_path> (move (p->make_directory ())); + modules /= "modules"; + } + } + + if (modules.empty ()) + fail << "unable to locate MSVC standard modules directory"; + + mt = &ctx.targets.insert_locked ( + *x_mod, + move (modules), + dir_path (), + m.name, + string ("ixx"), // For C++14 during bootstrap. + target_decl::implied, + trace).first; + + // For MSVC it's easier to detect the runtime being used since + // it's specified with the compile options (/MT[d], /MD[d]). // - // Note also that besides modules, prerequisite_targets may - // contain libraries which are interface dependencies of this - // library and which may be called to resolve its module - // dependencies. + // Similar semantics as in extract_headers() except here we + // use options visible from the root scope. Note that + // find_option_prefixes() looks in reverse, so look in the + // cmode, x_coptions, c_coptions order. // - const string* n (cast_null<string> (bt->vars[c_module_name])); + initializer_list<const char*> os {"/MD", "/MT", "-MD", "-MT"}; - if (n == nullptr) - continue; + const string* o; + if ((o = find_option_prefixes (os, cmode)) != nullptr || + (o = find_option_prefixes (os, rs, x_coptions)) != nullptr || + (o = find_option_prefixes (os, rs, c_coptions)) != nullptr) + { + ot = (*o)[2] == 'D' ? otype::s : otype::a; + } + else + ot = otype::s; // The default is /MD. - if (const target** p = check_exact (*n)) - *p = &make_module_sidebuild (a, bs, *lt, *bt, *n); + break; } - else - continue; + case compiler_type::gcc: + case compiler_type::icc: + assert (false); + }; - if (done) - break; - } + pair<target&, ulock> tl ( + this->make_module_sidebuild ( // GCC 4.9 + a, bs, nullptr, ot, *mt, m.name)); - if (done) - break; + if (tl.second.owns_lock ()) + { + // Special compile options for the std modules. + // + if (ctype == compiler_type::clang) + { + value& v (tl.first.append_locked (x_coptions)); - continue; - } + if (v.null) + v = strings {}; - // Fall through. - } + strings& cops (v.as<strings> ()); - // While it would have been even better not to search for a target, we - // need to get hold of the corresponding mxx{} (unlikely but possible - // for bmi{} to have a different name). - // - // While we want to use group_prerequisite_members() below, we cannot - // call resolve_group() since we will be doing it "speculatively" for - // modules that we may use but also for modules that may use us. This - // quickly leads to deadlocks. So instead we are going to perform an - // ad hoc group resolution. - // - const target* pg; - if (p.is_a<bmi> ()) - { - pg = pt != nullptr ? pt : &p.search (t); - pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}. - } - else if (p.is_a (btt)) - { - pg = &search (t, bmi::static_type, p.key ()); - if (pt == nullptr) pt = &p.search (t); + switch (ctype) + { + case compiler_type::clang: + { + cops.push_back ("-Wno-reserved-module-identifier"); + break; + } + case compiler_type::msvc: + // It appears nothing special is needed to compile MSVC + // standard modules. + case compiler_type::gcc: + case compiler_type::icc: + assert (false); + }; + } + + tl.second.unlock (); + } + + pts[start + i].target = &tl.first; + m.score = match_max (m.name) + 1; + continue; // Scan the rest to detect if all done. + } + + done = false; } - else - continue; + } - // Find the mxx{} prerequisite and extract its "file name" for the - // fuzzy match unless the user specified the module name explicitly. - // - for (prerequisite_member p: - prerequisite_members (a, t, group_prerequisites (*pt, pg))) + // Go over prerequisites and try to resolve imported modules with them. + // + if (!done) + { + for (prerequisite_member p: group_prerequisite_members (a, t)) { if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. continue; - if (p.is_a (*x_mod)) + const target* pt (p.load ()); // Should be cached for libraries. + + if (pt != nullptr) { - // Check for an explicit module name. Only look for an existing - // target (which means the name can only be specified on the - // target itself, not target type/pattern-spec). + const file* lt (nullptr); + + if (const libx* l = pt->is_a<libx> ()) + lt = link_member (*l, a, li); + else if (pt->is_a<liba> () || + pt->is_a<libs> () || + pt->is_a<libux> ()) + lt = &pt->as<file> (); + + // If this is a library, check its bmi{}s and mxx{}s. // - const target* t (p.search_existing ()); - const string* n (t != nullptr - ? cast_null<string> (t->vars[c_module_name]) - : nullptr); - if (n != nullptr) + if (lt != nullptr) { - if (const target** p = check_exact (*n)) - *p = pt; + find (*lt, find); + + if (done) + break; + + continue; } - else + + // Fall through. + } + + // While it would have been even better not to search for a target, + // we need to get hold of the corresponding mxx{} (unlikely but + // possible for bmi{} to have a different name). + // + // While we want to use group_prerequisite_members() below, we + // cannot call resolve_group() since we will be doing it + // "speculatively" for modules that we may use but also for modules + // that may use us. This quickly leads to deadlocks. So instead we + // are going to perform an ad hoc group resolution. + // + const target* pg; + if (p.is_a<bmi> ()) + { + pg = pt != nullptr ? pt : &p.search (t); + pt = &search (t, btt, p.key ()); // Same logic as in picking obj*{}. + } + else if (p.is_a (btt)) + { + pg = &search (t, bmi::static_type, p.key ()); + if (pt == nullptr) pt = &p.search (t); + } + else + continue; + + // Find the mxx{} prerequisite and extract its "file name" for the + // fuzzy match unless the user specified the module name explicitly. + // + for (prerequisite_member p: + prerequisite_members (a, t, group_prerequisites (*pt, pg))) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + if (p.is_a (*x_mod)) { - // Fuzzy match. + // Check for an explicit module name. Only look for an existing + // target (which means the name can only be specified on the + // target itself, not target type/pattern-spec). // - string f; + const target* mt (p.search_existing ()); + const string* n (mt != nullptr + ? cast_null<string> (mt->vars[c_module_name]) + : nullptr); + if (n != nullptr) + { + if (const target** p = check_exact (*n)) + *p = pt; + } + else + { + // Fuzzy match. + // + string f; - // Add the directory part if it is relative. The idea is to - // include it into the module match, say hello.core vs - // hello/mxx{core}. - // - // @@ MOD: Why not for absolute? Good question. What if it - // contains special components, say, ../mxx{core}? - // - const dir_path& d (p.dir ()); + // Add the directory part if it is relative. The idea is to + // include it into the module match, say hello.core vs + // hello/mxx{core}. + // + // @@ MOD: Why not for absolute? Good question. What if it + // contains special components, say, ../mxx{core}? + // + const dir_path& d (p.dir ()); - if (!d.empty () && d.relative ()) - f = d.representation (); // Includes trailing slash. + if (!d.empty () && d.relative ()) + f = d.representation (); // Includes trailing slash. - f += p.name (); - check_fuzzy (pt, f); + f += p.name (); + check_fuzzy (pt, f); + } + break; } - break; } - } - if (done) - break; + if (done) + break; + } } // Diagnose unresolved modules. @@ -5739,8 +6467,13 @@ namespace build2 // // But at this stage this doesn't seem worth the trouble. // - fail (relative (src)) << "unable to resolve module " - << imports[i].name; + fail (relative (src)) + << "unable to resolve module " << imports[i].name << + info << "verify module interface is listed as a prerequisite, " + << "otherwise" << + info << "consider adjusting module interface file names or" << + info << "consider specifying module name with " << x + << ".module_name"; } } } @@ -5776,9 +6509,12 @@ namespace build2 if (m.score <= match_max (in)) { - const string& mn (cast<string> (bt->state[a].vars[c_module_name])); + // As above (deffered failure). + // + const string* mn ( + cast_null<string> (bt->state[a].vars[c_module_name])); - if (in != mn) + if (mn != nullptr && in != *mn) { // Note: matched, so the group should be resolved. // @@ -5792,7 +6528,7 @@ namespace build2 fail (relative (src)) << "failed to correctly guess module name from " << p << info << "guessed: " << in << - info << "actual: " << mn << + info << "actual: " << *mn << info << "consider adjusting module interface file names or" << info << "consider specifying module name with " << x << ".module_name"; @@ -5803,11 +6539,11 @@ namespace build2 // Hash (we know it's a file). // - cs.append (static_cast<const file&> (*bt).path ().string ()); + cs.append (bt->as<file> ().path ().string ()); // Copy over bmi{}s from our prerequisites weeding out duplicates. // - if (size_t j = bt->data<match_data> ().modules.start) + if (size_t j = bt->data<match_data> (a).modules.start) { // Hard to say whether we should reserve or not. We will probably // get quite a bit of duplications. @@ -5820,26 +6556,29 @@ namespace build2 if (et == nullptr) continue; // Unresolved (std.*). - const string& mn (cast<string> (et->state[a].vars[c_module_name])); + // As above (deferred failure). + // + const string* mn (cast_null<string> (et->state[a].vars[c_module_name])); - if (find_if (imports.begin (), imports.end (), - [&mn] (const module_import& i) + if (mn != nullptr && + find_if (imports.begin (), imports.end (), + [mn] (const module_import& i) { - return i.name == mn; + return i.name == *mn; }) == imports.end ()) { pts.push_back (et); - cs.append (static_cast<const file&> (*et).path ().string ()); + cs.append (et->as<file> ().path ().string ()); // Add to the list of imports for further duplicate suppression. // We could have stored reference to the name (e.g., in score) // but it's probably not worth it if we have a small string // optimization. // - import_type t (mn.find (':') != string::npos + import_type t (mn->find (':') != string::npos ? import_type::module_part : import_type::module_intf); - imports.push_back (module_import {t, mn, true, 0}); + imports.push_back (module_import {t, *mn, true, 0}); } } } @@ -5859,7 +6598,11 @@ namespace build2 // Find or create a modules sidebuild subproject returning its root // directory. // - dir_path compile_rule:: + // @@ Could we omit creating a subproject if the sidebuild scope is the + // project scope itself? This would speed up simple examples (and + // potentially direct compilation that we may support). + // + pair<dir_path, const scope&> compile_rule:: find_modules_sidebuild (const scope& rs) const { context& ctx (rs.ctx); @@ -5869,6 +6612,9 @@ namespace build2 // cc.config module and that is within our amalgmantion seems like a // good place. // + // @@ TODO: maybe we should cache this in compile_rule ctor like we + // do for the header cache? + // const scope* as (&rs); { const scope* ws (as->weak_scope ()); @@ -5884,7 +6630,7 @@ namespace build2 // This is also the module that registers the scope operation // callback that cleans up the subproject. // - if (cast_false<bool> ((*s)["cc.core.vars.loaded"])) + if (cast_false<bool> (s->vars["cc.core.vars.loaded"])) as = s; } while (s != ws); @@ -5901,7 +6647,7 @@ namespace build2 module_build_modules_dir /= x); - const scope* ps (&ctx.scopes.find (pd)); + const scope* ps (&ctx.scopes.find_out (pd)); if (ps->out_path () != pd) { @@ -5912,7 +6658,7 @@ namespace build2 // Re-test again now that we are in exclusive phase (another thread // could have already created and loaded the subproject). // - ps = &ctx.scopes.find (pd); + ps = &ctx.scopes.find_out (pd); if (ps->out_path () != pd) { @@ -5957,16 +6703,21 @@ namespace build2 assert (m != nullptr && m->modules); #endif - return pd; + return pair<dir_path, const scope&> (move (pd), *as); } - // Synthesize a dependency for building a module binary interface on - // the side. + // Synthesize a dependency for building a module binary interface of a + // library on the side. If library is missing, then assume it's some + // ad hoc/system library case (in which case we assume it's binless, + // for now). // - const file& compile_rule:: + // The return value semantics is as in target_set::insert_locked(). + // + pair<target&, ulock> compile_rule:: make_module_sidebuild (action a, const scope& bs, - const file& lt, + const file* lt, + otype ot, const target& mt, const string& mn) const { @@ -5974,7 +6725,7 @@ namespace build2 // Note: see also make_header_sidebuild() below. - dir_path pd (find_modules_sidebuild (*bs.root_scope ())); + dir_path pd (find_modules_sidebuild (*bs.root_scope ()).first); // We need to come up with a file/target name that will be unique enough // not to conflict with other modules. If we assume that within an @@ -5987,24 +6738,20 @@ namespace build2 back_inserter (mf), [] (char c) {return c == '.' ? '-' : c == ':' ? '+' : c;}); - // It seems natural to build a BMI type that corresponds to the library - // type. After all, this is where the object file part of the BMI is - // going to come from (unless it's a module interface-only library). - // - const target_type& tt (compile_types (link_type (lt).type).bmi); + const target_type& tt (compile_types (ot).bmi); // Store the BMI target in the subproject root. If the target already // exists then we assume all this is already done (otherwise why would // someone have created such a target). // - if (const file* bt = bs.ctx.targets.find<file> ( + if (const target* bt = bs.ctx.targets.find ( tt, pd, dir_path (), // Always in the out tree. mf, nullopt, // Use default extension. trace)) - return *bt; + return pair<target&, ulock> (const_cast<target&> (*bt), ulock ()); prerequisites ps; ps.push_back (prerequisite (mt)); @@ -6017,24 +6764,22 @@ namespace build2 // // Note: lt is matched and so the group is resolved. // - ps.push_back (prerequisite (lt)); - for (prerequisite_member p: group_prerequisite_members (a, lt)) + if (lt != nullptr) { - if (include (a, lt, p) != include_type::normal) // Excluded/ad hoc. - continue; - - // @@ TODO: will probably need revision if using sidebuild for - // non-installed libraries (e.g., direct BMI dependencies - // will probably have to be translated to mxx{} or some such). - // Hm, don't think we want it this way: we want BMIs of binless - // library to be built in the library rather than on the side - // (so they can be potentially re-used by multiple independent - // importers). - // - if (p.is_a<libx> () || - p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ()) + ps.push_back (prerequisite (*lt)); + for (prerequisite_member p: group_prerequisite_members (a, *lt)) { - ps.push_back (p.as_prerequisite ()); + // Ignore update=match. + // + lookup l; + if (include (a, *lt, p, &l) != include_type::normal) // Excluded/ad hoc. + continue; + + if (p.is_a<libx> () || + p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ()) + { + ps.push_back (p.as_prerequisite ()); + } } } @@ -6045,31 +6790,33 @@ namespace build2 move (mf), nullopt, // Use default extension. target_decl::implied, - trace)); - file& bt (static_cast<file&> (p.first)); + trace, + true /* skip_find */)); // Note that this is racy and someone might have created this target // while we were preparing the prerequisite list. // if (p.second) { - bt.prerequisites (move (ps)); + p.first.prerequisites (move (ps)); // Unless this is a binless library, we don't need the object file // (see config_data::b_binless for details). // - bt.vars.assign (b_binless) = (lt.mtime () == timestamp_unreal); + p.first.vars.assign (b_binless) = (lt == nullptr || + lt->mtime () == timestamp_unreal); } - return bt; + return p; } // Synthesize a dependency for building a header unit binary interface on // the side. // const file& compile_rule:: - make_header_sidebuild (action, + make_header_sidebuild (action a, const scope& bs, + const file& t, linfo li, const file& ht) const { @@ -6077,7 +6824,116 @@ namespace build2 // Note: similar to make_module_sidebuild() above. - dir_path pd (find_modules_sidebuild (*bs.root_scope ())); + auto sb (find_modules_sidebuild (*bs.root_scope ())); + dir_path pd (move (sb.first)); + const scope& as (sb.second); + + // Determine if this header belongs to one of the libraries we depend + // on. + // + // Note that because libraries are not in prerequisite_targets, we have + // to go through prerequisites, similar to append_library_options(). + // + const target* lt (nullptr); // Can be lib{}. + { + // Note that any such library would necessarily be an interface + // dependency so we never need to go into implementations. + // + auto imp = [] (const target&, bool) { return false; }; + + // The same logic as in append_libraries(). + // + appended_libraries ls; + struct data + { + action a; + const file& ht; + const target*& lt; + appended_libraries& ls; + } d {a, ht, lt, ls}; + + auto lib = [&d] ( + const target* const* lc, + const small_vector<reference_wrapper<const string>, 2>&, + lflags, + const string*, + bool) + { + // Prune any further traversal if we already found it. + // + if (d.lt != nullptr) + return false; + + const target* l (lc != nullptr ? *lc : nullptr); // Can be lib{}. + + if (l == nullptr) + return true; + + // Suppress duplicates. + // + if (find (d.ls.begin (), d.ls.end (), l) != d.ls.end ()) + return false; + + // Feels like we should only consider non-utility libraries with + // utilities being treated as "direct" use. + // + if (l->is_a<libux> ()) + return true; + + // Since the library is searched and matched, all the headers should + // be in prerequisite_targets. + // + const auto& pts (l->prerequisite_targets[d.a]); + if (find (pts.begin (), pts.end (), &d.ht) != pts.end ()) + { + d.lt = l; + return false; + } + + d.ls.push_back (l); + return true; + }; + + library_cache lib_cache; + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + if (include (a, t, p) != include_type::normal) // Excluded/ad hoc. + continue; + + // Should be already searched and matched for libraries. + // + if (const target* pt = p.load ()) + { + if (const libx* l = pt->is_a<libx> ()) + pt = link_member (*l, a, li); + + bool la; + const file* f; + if ((la = (f = pt->is_a<liba> ())) || + (la = (f = pt->is_a<libux> ())) || + ( (f = pt->is_a<libs> ()))) + { + // Note that we are requesting process_libraries() to not pick + // the liba/libs{} member of the installed libraries and return + // the lib{} group itself instead. This is because, for the + // installed case, the library prerequisites (both headers and + // interface dependency libraries) are matched by file_rule + // which won't pick the liba/libs{} member (naturally) but will + // just match the lib{} group. + // + process_libraries (a, bs, nullopt, sys_lib_dirs, + *f, la, 0, // lflags unused. + imp, lib, nullptr, + true /* self */, + false /* proc_opt_group */, + &lib_cache); + + if (lt != nullptr) + break; + } + } + } + } // What should we use as a file/target name? On one hand we want it // unique enough so that <stdio.h> and <custom/stdio.h> don't end up @@ -6102,7 +6958,14 @@ namespace build2 mf += sha256 (hp.string ()).abbreviated_string (12); } - const target_type& tt (compile_types (li.type).hbmi); + // If the header comes from the library, use its hbmi?{} type to + // maximize reuse. + // + const target_type& tt ( + compile_types ( + lt != nullptr && !lt->is_a<lib> () + ? link_type (*lt).type + : li.type).hbmi); if (const file* bt = bs.ctx.targets.find<file> ( tt, @@ -6116,6 +6979,51 @@ namespace build2 prerequisites ps; ps.push_back (prerequisite (ht)); + // Similar story as for modules: the header may need poptions from its + // library (e.g., -I to find other headers that it includes). + // + if (lt != nullptr) + ps.push_back (prerequisite (*lt)); + else + { + // If the header does not belong to a library then this is a "direct" + // use, for example, by an exe{} target. In this case we need to add + // all the prerequisite libraries as well as scope p/coptions (in a + // sense, we are trying to approximate how all the sources that would + // typically include such a header are build). + // + // Note that this is also the case when we build the library's own + // sources (in a way it would have been cleaner to always build + // library's headers with only its "interface" options/prerequisites + // but that won't be easy to achieve). + // + // Note also that at first it might seem like a good idea to + // incorporate this information into the hash we use to form the BMI + // name. But that would reduce sharing of the BMI. For example, that + // would mean we will build the library header twice, once with the + // implementation options/prerequisites and once -- with interface. + // On the other hand, importable headers are expected to be "modular" + // and should probably not depend on any of the implementation + // options/prerequisites (though one could conceivably build a + // "richer" BMI if it is also to be used to build the library + // implementation -- interesting idea). + // + for (prerequisite_member p: group_prerequisite_members (a, t)) + { + // Ignore update=match. + // + lookup l; + if (include (a, t, p, &l) != include_type::normal) // Excluded/ad hoc. + continue; + + if (p.is_a<libx> () || + p.is_a<liba> () || p.is_a<libs> () || p.is_a<libux> ()) + { + ps.push_back (p.as_prerequisite ()); + } + } + } + auto p (bs.ctx.targets.insert_locked ( tt, move (pd), @@ -6123,22 +7031,47 @@ namespace build2 move (mf), nullopt, // Use default extension. target_decl::implied, - trace)); - file& bt (static_cast<file&> (p.first)); + trace, + true /* skip_find */)); + file& bt (p.first.as<file> ()); // Note that this is racy and someone might have created this target // while we were preparing the prerequisite list. // if (p.second) + { bt.prerequisites (move (ps)); + // Add the p/coptions from our scope in case of a "direct" use. Take + // into account hbmi{} target-type/pattern values to allow specifying + // hbmi-specific options. + // + if (lt == nullptr) + { + auto set = [&bs, &as, &tt, &bt] (const variable& var) + { + // Avoid duplicating the options if they are from the same + // amalgamation as the sidebuild. + // + lookup l (bs.lookup (var, tt, bt.name, hbmi::static_type, bt.name)); + if (l.defined () && !l.belongs (as)) + bt.assign (var) = *l; + }; + + set (c_poptions); + set (x_poptions); + set (c_coptions); + set (x_coptions); + } + } + return bt; } // Filter cl.exe noise (msvc.cxx). // void - msvc_filter_cl (ifdstream&, const path& src); + msvc_filter_cl (diag_buffer&, const path& src); // Append header unit-related options. // @@ -6189,7 +7122,7 @@ namespace build2 // options). // void compile_rule:: - append_module_options (environment& env, + append_module_options (environment&, cstrings& args, small_vector<string, 2>& stor, action a, @@ -6200,8 +7133,6 @@ namespace build2 unit_type ut (md.type); const module_positions& ms (md.modules); - dir_path stdifc; // See the VC case below. - switch (ctype) { case compiler_type::gcc: @@ -6230,15 +7161,12 @@ namespace build2 if (ms.start == 0) return; - // Clang embeds module file references so we only need to specify - // our direct imports. - // - // If/when we get the ability to specify the mapping in a file, we - // will pass the whole list. + // If/when we get the ability to specify the mapping in a file. // #if 0 // In Clang the module implementation's unit .pcm is special and - // must be "loaded". + // must be "loaded". Note: not anymore, not from Clang 16 and is + // deprecated in 17. // if (ut == unit_type::module_impl) { @@ -6255,10 +7183,7 @@ namespace build2 stor.push_back (move (s)); #else auto& pts (t.prerequisite_targets[a]); - for (size_t i (ms.start), - n (ms.copied != 0 ? ms.copied : pts.size ()); - i != n; - ++i) + for (size_t i (ms.start), n (pts.size ()); i != n; ++i) { const target* pt (pts[i]); @@ -6271,17 +7196,9 @@ namespace build2 const file& f (pt->as<file> ()); string s (relative (f.path ()).string ()); - // In Clang the module implementation's unit .pcm is special and - // must be "loaded". - // - if (ut == unit_type::module_impl && i == ms.start) - s.insert (0, "-fmodule-file="); - else - { - s.insert (0, 1, '='); - s.insert (0, cast<string> (f.state[a].vars[c_module_name])); - s.insert (0, "-fmodule-file="); - } + s.insert (0, 1, '='); + s.insert (0, cast<string> (f.state[a].vars[c_module_name])); + s.insert (0, "-fmodule-file="); stor.push_back (move (s)); } @@ -6293,10 +7210,11 @@ namespace build2 if (ms.start == 0) return; + // MSVC requires a transitive set of interfaces, including + // implementation partitions. + // auto& pts (t.prerequisite_targets[a]); - for (size_t i (ms.start), n (pts.size ()); - i != n; - ++i) + for (size_t i (ms.start), n (pts.size ()); i != n; ++i) { const target* pt (pts[i]); @@ -6307,34 +7225,14 @@ namespace build2 // of these are bmi's. // const file& f (pt->as<file> ()); + string s (relative (f.path ()).string ()); - // In VC std.* modules can only come from a single directory - // specified with the IFCPATH environment variable or the - // /module:stdIfcDir option. - // - if (std_module (cast<string> (f.state[a].vars[c_module_name]))) - { - dir_path d (f.path ().directory ()); + s.insert (0, 1, '='); + s.insert (0, cast<string> (f.state[a].vars[c_module_name])); - if (stdifc.empty ()) - { - // Go one directory up since /module:stdIfcDir will look in - // either Release or Debug subdirectories. Keeping the result - // absolute feels right. - // - stor.push_back ("/module:stdIfcDir"); - stor.push_back (d.directory ().string ()); - stdifc = move (d); - } - else if (d != stdifc) // Absolute and normalized. - fail << "multiple std.* modules in different directories"; - } - else - { - stor.push_back ("/module:reference"); - stor.push_back (relative (f.path ()).string ()); - } + stor.push_back (move (s)); } + break; } case compiler_type::icc: @@ -6345,35 +7243,20 @@ namespace build2 // into storage? Because of potential reallocations. // for (const string& a: stor) - args.push_back (a.c_str ()); - - if (getenv ("IFCPATH")) - { - // VC's IFCPATH takes precedence over /module:stdIfcDir so unset it if - // we are using our own std modules. - // - if (!stdifc.empty ()) - env.push_back ("IFCPATH"); - } - else if (stdifc.empty ()) { - // Add the VC's default directory (should be only one). - // - if (sys_mod_dirs != nullptr && !sys_mod_dirs->empty ()) - { - args.push_back ("/module:stdIfcDir"); - args.push_back (sys_mod_dirs->front ().string ().c_str ()); - } + if (ctype == compiler_type::msvc) + args.push_back ("/reference"); + + args.push_back (a.c_str ()); } } target_state compile_rule:: - perform_update (action a, const target& xt) const + perform_update (action a, const target& xt, match_data& md) const { const file& t (xt.as<file> ()); const path& tp (t.path ()); - match_data md (move (t.data<match_data> ())); unit_type ut (md.type); context& ctx (t.ctx); @@ -6396,9 +7279,6 @@ namespace build2 }, md.modules.copied)); // See search_modules() for details. - const file& s (pr.second); - const path* sp (&s.path ()); - // Force recompilation in case of a deferred failure even if nothing // changed. // @@ -6415,11 +7295,14 @@ namespace build2 return *pr.first; } + const file& s (pr.second); + const path* sp (&s.path ()); + // Make sure depdb is no older than any of our prerequisites (see md.mt // logic description above for details). Also save the sequence start // time if doing mtime checks (see the depdb::check_mtime() call below). // - timestamp start (depdb::mtime_check () + timestamp start (!ctx.dry_run && depdb::mtime_check () ? system_clock::now () : timestamp_unknown); @@ -6437,7 +7320,8 @@ namespace build2 // If we are building a module interface or partition, then the target // is bmi*{} and it may have an ad hoc obj*{} member. For header units // there is no obj*{} (see the corresponding add_adhoc_member() call in - // apply()). + // apply()). For named modules there may be no obj*{} if this is a + // sidebuild (obj*{} is already in the library binary). // path relm; path relo; @@ -6485,9 +7369,6 @@ namespace build2 small_vector<string, 2> header_args; // Header unit options storage. small_vector<string, 2> module_args; // Module options storage. - size_t out_i (0); // Index of the -o option. - size_t lang_n (0); // Number of lang options. - switch (cclass) { case compiler_class::msvc: @@ -6505,7 +7386,44 @@ namespace build2 append_options (args, cmode); if (md.pp != preprocessed::all) - append_sys_inc_options (args); // Extra system header dirs (last). + append_sys_hdr_options (args); // Extra system header dirs (last). + + // Note: could be overridden in mode. + // + append_diag_color_options (args); + + // Set source/execution charsets to UTF-8 unless a custom charset + // is specified. + // + // Note that clang-cl supports /utf-8 and /*-charset. + // + { + bool sc (find_option_prefixes ( + {"/source-charset:", "-source-charset:"}, args)); + bool ec (find_option_prefixes ( + {"/execution-charset:", "-execution-charset:"}, args)); + + if (!sc && !ec) + args.push_back ("/utf-8"); + else + { + if (!sc) + args.push_back ("/source-charset:UTF-8"); + + if (!ec) + args.push_back ("/execution-charset:UTF-8"); + } + } + + // If we have any /external:I options but no /external:Wn, then add + // /external:W0 to emulate the -isystem semantics. + // + if (cvariant != "clang" && isystem (*this)) + { + if (find_option_prefixes ({"/external:I", "-external:I"}, args) && + !find_option_prefixes ({"/external:W", "-external:W"}, args)) + args.push_back ("/external:W0"); + } // While we want to keep the low-level build as "pure" as possible, // the two misguided defaults, C++ exceptions and runtime, just have @@ -6517,7 +7435,9 @@ namespace build2 // For C looks like no /EH* (exceptions supported but no C++ objects // destroyed) is a reasonable default. // - if (x_lang == lang::cxx && !find_option_prefix ("/EH", args)) + + if (x_lang == lang::cxx && + !find_option_prefixes ({"/EH", "-EH"}, args)) args.push_back ("/EHsc"); // The runtime is a bit more interesting. At first it may seem like @@ -6539,7 +7459,7 @@ namespace build2 // unreasonable thing to do). So by default we will always use the // release runtime. // - if (!find_option_prefixes ({"/MD", "/MT"}, args)) + if (!find_option_prefixes ({"/MD", "/MT", "-MD", "-MT"}, args)) args.push_back ("/MD"); msvc_sanitize_cl (args); @@ -6562,9 +7482,8 @@ namespace build2 // Note also that what we are doing here appears to be incompatible // with PCH (/Y* options) and /Gm (minimal rebuild). // - // @@ MOD: TODO deal with absent relo. - // - if (find_options ({"/Zi", "/ZI"}, args)) + if (!relo.empty () && + find_options ({"/Zi", "/ZI", "-Zi", "-ZI"}, args)) { if (fc) args.push_back ("/Fd:"); @@ -6577,27 +7496,38 @@ namespace build2 args.push_back (out1.c_str ()); } - if (fc) - { - args.push_back ("/Fo:"); - args.push_back (relo.string ().c_str ()); - } - else + if (ut == unit_type::module_intf || + ut == unit_type::module_intf_part || + ut == unit_type::module_impl_part || + ut == unit_type::module_header) { - out = "/Fo" + relo.string (); - args.push_back (out.c_str ()); - } + assert (ut != unit_type::module_header); // @@ MODHDR - // @@ MODHDR MSVC - // @@ MODPART MSVC - // - if (ut == unit_type::module_intf) - { relm = relative (tp); - args.push_back ("/module:interface"); - args.push_back ("/module:output"); + args.push_back ("/ifcOutput"); args.push_back (relm.string ().c_str ()); + + if (relo.empty ()) + args.push_back ("/ifcOnly"); + else + { + args.push_back ("/Fo:"); + args.push_back (relo.string ().c_str ()); + } + } + else + { + if (fc) + { + args.push_back ("/Fo:"); + args.push_back (relo.string ().c_str ()); + } + else + { + out = "/Fo" + relo.string (); + args.push_back (out.c_str ()); + } } // Note: no way to indicate that the source if already preprocessed. @@ -6610,6 +7540,65 @@ namespace build2 } case compiler_class::gcc: { + append_options (args, cmode); + + // Clang 15 introduced the unqualified-std-cast-call warning which + // warns about unqualified calls to std::move() and std::forward() + // (because they can be "hijacked" via ADL). Surprisingly, this + // warning is enabled by default, as opposed to with -Wextra or at + // least -Wall. It has also proven to be quite disruptive, causing a + // large number of warnings in a large number of packages. So we are + // going to "remap" it to -Wextra for now and in the future may + // "relax" it to -Wall and potentially to being enabled by default. + // See GitHub issue #259 for background and details. + // + if (x_lang == lang::cxx && + ctype == compiler_type::clang && + cmaj >= 15) + { + bool w (false); // Seen -W[no-]unqualified-std-cast-call + optional<bool> extra; // Seen -W[no-]extra + + for (const char* s: reverse_iterate (args)) + { + if (s != nullptr) + { + if (strcmp (s, "-Wunqualified-std-cast-call") == 0 || + strcmp (s, "-Wno-unqualified-std-cast-call") == 0) + { + w = true; + break; + } + + if (!extra) // Last seen option wins. + { + if (strcmp (s, "-Wextra") == 0) extra = true; + else if (strcmp (s, "-Wno-extra") == 0) extra = false; + } + } + } + + if (!w && (!extra || !*extra)) + args.push_back ("-Wno-unqualified-std-cast-call"); + } + + if (md.pp != preprocessed::all) + append_sys_hdr_options (args); // Extra system header dirs (last). + + // Note: could be overridden in mode. + // + append_diag_color_options (args); + + // Set the input charset to UTF-8 unless a custom one is specified. + // + // Note that the execution charset (-fexec-charset) is UTF-8 by + // default. + // + // Note that early versions of Clang only recognize uppercase UTF-8. + // + if (!find_option_prefix ("-finput-charset=", args)) + args.push_back ("-finput-charset=UTF-8"); + if (ot == otype::s) { // On Darwin, Win32 -fPIC is the default. @@ -6653,8 +7642,7 @@ namespace build2 // either -nostdlib or -nostartfiles is specified. Let's do // the same. // - initializer_list<const char*> os {"-nostdlib", "-nostartfiles"}; - if (!find_options (os, cmode) && !find_options (os, args)) + if (!find_options ({"-nostdlib", "-nostartfiles"}, args)) { args.push_back ("-D_MT"); args.push_back ("-D_DLL"); @@ -6713,18 +7701,9 @@ namespace build2 } } - append_options (args, cmode); - - if (md.pp != preprocessed::all) - append_sys_inc_options (args); // Extra system header dirs (last). - append_header_options (env, args, header_args, a, t, md, md.dd); append_module_options (env, args, module_args, a, t, md, md.dd); - // Note: the order of the following options is relied upon below. - // - out_i = args.size (); // Index of the -o option. - if (ut == unit_type::module_intf || ut == unit_type::module_intf_part || ut == unit_type::module_impl_part || @@ -6763,21 +7742,35 @@ namespace build2 } case compiler_type::clang: { - // @@ MOD TODO: deal with absent relo. + assert (ut != unit_type::module_header); // @@ MODHDR relm = relative (tp); - args.push_back ("-o"); - args.push_back (relm.string ().c_str ()); - args.push_back ("--precompile"); - // Without this option Clang's .pcm will reference source - // files. In our case this file may be transient (.ii). Plus, + // files. In our case this file may be transient (.ii). Plus, // it won't play nice with distributed compilation. // + // Note that this sort of appears to be the default from Clang + // 17, but not quite, see llvm-project issued #72383. + // args.push_back ("-Xclang"); args.push_back ("-fmodules-embed-all-files"); + if (relo.empty ()) + { + args.push_back ("-o"); + args.push_back (relm.string ().c_str ()); + args.push_back ("--precompile"); + } + else + { + out1 = "-fmodule-output=" + relm.string (); + args.push_back (out1.c_str ()); + args.push_back ("-o"); + args.push_back (relo.string ().c_str ()); + args.push_back ("-c"); + } + break; } case compiler_type::msvc: @@ -6792,7 +7785,7 @@ namespace build2 args.push_back ("-c"); } - lang_n = append_lang_options (args, md); + append_lang_options (args, md); if (md.pp == preprocessed::all) { @@ -6837,25 +7830,46 @@ namespace build2 if (!env.empty ()) env.push_back (nullptr); + // We have no choice but to serialize early if we want the command line + // printed shortly before actually executing the compiler. Failed that, + // it may look like we are still executing in parallel. + // + scheduler::alloc_guard jobs_ag; + if (!ctx.dry_run && cast_false<bool> (t[c_serialize])) + jobs_ag = scheduler::alloc_guard (*ctx.sched, phase_unlock (nullptr)); + // With verbosity level 2 print the command line as if we are compiling // the source file, not its preprocessed version (so that it's easy to // copy and re-run, etc). Only at level 3 and above print the real deal. // + // @@ TODO: why don't we print env (here and/or below)? Also link rule. + // if (verb == 1) - text << x_name << ' ' << s; + { + const char* name (x_assembler_cpp (s) ? "as-cpp" : + x_objective (s) ? x_obj_name : + x_name); + + print_diag (name, s, t); + } else if (verb == 2) print_process (args); // If we have the (partially) preprocessed output, switch to that. // - bool psrc (!md.psrc.path.empty ()); - bool pact (md.psrc.active); + // But we remember the original source/position to restore later. + // + bool psrc (md.psrc); // Note: false if cc.reprocess. + bool ptmp (psrc && md.psrc.temporary); + pair<size_t, const char*> osrc; if (psrc) { args.pop_back (); // nullptr + osrc.second = args.back (); args.pop_back (); // sp + osrc.first = args.size (); - sp = &md.psrc.path; + sp = &md.psrc.path (); // This should match with how we setup preprocessing. // @@ -6863,25 +7877,40 @@ namespace build2 { case compiler_type::gcc: { - // The -fpreprocessed is implied by .i/.ii. But not when compiling - // a header unit (there is no .hi/.hii). + // -fpreprocessed is implied by .i/.ii unless compiling a header + // unit (there is no .hi/.hii). Also, we would need to pop -x + // since it takes precedence over the extension, which would mess + // up our osrc logic. So in the end it feels like always passing + // explicit -fpreprocessed is the way to go. // - if (ut == unit_type::module_header) - args.push_back ("-fpreprocessed"); - else - // Pop -x since it takes precedence over the extension. - // - // @@ I wonder why bother and not just add -fpreprocessed? Are - // we trying to save an option or does something break? - // - for (; lang_n != 0; --lang_n) - args.pop_back (); - + // Also note that similarly there is no .Si for .S files. + // + args.push_back ("-fpreprocessed"); args.push_back ("-fdirectives-only"); break; } case compiler_type::clang: { + // Clang 15 and later with -pedantic warns about GNU-style line + // markers that it wrote itself in the -frewrite-includes output + // (llvm-project issue 63284). So we suppress this warning unless + // compiling from source. + // + // In Apple Clang this warning/option are absent in 14.0.3 (which + // is said to be based on vanilla Clang 15.0.5) for some reason + // (let's hope it's because they patched it out rather than due to + // a misleading _LIBCPP_VERSION value). + // + if (ctype == compiler_type::clang && + cmaj >= (cvariant != "apple" ? 15 : 16)) + { + if (find_options ({"-pedantic", "-pedantic-errors", + "-Wpedantic", "-Werror=pedantic"}, args)) + { + args.push_back ("-Wno-gnu-line-marker"); + } + } + // Note that without -x Clang will treat .i/.ii as fully // preprocessed. // @@ -6902,10 +7931,11 @@ namespace build2 // Let's keep the preprocessed file in case of an error but only at // verbosity level 3 and up (when one actually sees it mentioned on - // the command line). We also have to re-arm on success (see below). + // the command line). We also have to re-enable on success (see + // below). // - if (pact && verb >= 3) - md.psrc.active = false; + if (ptmp && verb >= 3) + md.psrc.temporary = false; } if (verb >= 3) @@ -6924,46 +7954,43 @@ namespace build2 { try { + // If we are compiling the preprocessed output, get its read handle. + // + file_cache::read psrcr (psrc ? md.psrc.open () : file_cache::read ()); + // VC cl.exe sends diagnostics to stdout. It also prints the file - // name being compiled as the first line. So for cl.exe we redirect - // stdout to a pipe, filter that noise out, and send the rest to - // stderr. + // name being compiled as the first line. So for cl.exe we filter + // that noise out. // - // For other compilers redirect stdout to stderr, in case any of - // them tries to pull off something similar. For sane compilers this - // should be harmless. + // For other compilers also redirect stdout to stderr, in case any + // of them tries to pull off something similar. For sane compilers + // this should be harmless. // bool filter (ctype == compiler_type::msvc); process pr (cpath, - args.data (), - 0, (filter ? -1 : 2), 2, + args, + 0, 2, diag_buffer::pipe (ctx, filter /* force */), nullptr, // CWD env.empty () ? nullptr : env.data ()); - if (filter) - { - try - { - ifdstream is ( - move (pr.in_ofd), fdstream_mode::text, ifdstream::badbit); + diag_buffer dbuf (ctx, args[0], pr); - msvc_filter_cl (is, *sp); + if (filter) + msvc_filter_cl (dbuf, *sp); - // If anything remains in the stream, send it all to stderr. - // Note that the eof check is important: if the stream is at - // eof, this and all subsequent writes to the diagnostics stream - // will fail (and you won't see a thing). - // - if (is.peek () != ifdstream::traits_type::eof ()) - diag_stream_lock () << is.rdbuf (); + dbuf.read (); - is.close (); - } - catch (const io_error&) {} // Assume exits with error. + // Restore the original source if we switched to preprocessed. + // + if (psrc) + { + args.resize (osrc.first); + args.push_back (osrc.second); + args.push_back (nullptr); } - run_finish (args, pr); + run_finish (dbuf, args, pr, 1 /* verbosity */); } catch (const process_error& e) { @@ -6975,65 +8002,16 @@ namespace build2 throw failed (); } + jobs_ag.deallocate (); + if (md.deferred_failure) fail << "expected error exit status from " << x_lang << " compiler"; } // Remove preprocessed file (see above). // - if (pact && verb >= 3) - md.psrc.active = true; - - // Clang's module compilation requires two separate compiler - // invocations. - // - // @@ MODPART: Clang (all of this is probably outdated). - // - if (ctype == compiler_type::clang && ut == unit_type::module_intf) - { - // Adjust the command line. First discard everything after -o then - // build the new "tail". - // - args.resize (out_i + 1); - args.push_back (relo.string ().c_str ()); // Produce .o. - args.push_back ("-c"); // By compiling .pcm. - args.push_back ("-Wno-unused-command-line-argument"); - args.push_back (relm.string ().c_str ()); - args.push_back (nullptr); - - if (verb >= 2) - print_process (args); - - if (!ctx.dry_run) - { - // Remove the target file if this fails. If we don't do that, we - // will end up with a broken build that is up-to-date. - // - auto_rmfile rm (relm); - - try - { - process pr (cpath, - args.data (), - 0, 2, 2, - nullptr, // CWD - env.empty () ? nullptr : env.data ()); - - run_finish (args, pr); - } - catch (const process_error& e) - { - error << "unable to execute " << args[0] << ": " << e; - - if (e.child) - exit (1); - - throw failed (); - } - - rm.cancel (); - } - } + if (ptmp && verb >= 3) + md.psrc.temporary = true; timestamp now (system_clock::now ()); @@ -7050,18 +8028,27 @@ namespace build2 } target_state compile_rule:: - perform_clean (action a, const target& xt) const + perform_clean (action a, const target& xt, const target_type& srct) const { const file& t (xt.as<file> ()); - clean_extras extras; + // Preprocessed file extension. + // + const char* pext (x_assembler_cpp (srct) ? ".Si" : + x_objective (srct) ? x_obj_pext : + x_pext); + // Compressed preprocessed file extension. + // + string cpext (t.ctx.fcache->compressed_extension (pext)); + + clean_extras extras; switch (ctype) { - case compiler_type::gcc: extras = {".d", x_pext, ".t"}; break; - case compiler_type::clang: extras = {".d", x_pext}; break; - case compiler_type::msvc: extras = {".d", x_pext, ".idb", ".pdb"};break; - case compiler_type::icc: extras = {".d"}; break; + case compiler_type::gcc: extras = {".d", pext, cpext.c_str (), ".t"}; break; + case compiler_type::clang: extras = {".d", pext, cpext.c_str ()}; break; + case compiler_type::msvc: extras = {".d", pext, cpext.c_str (), ".idb", ".pdb"}; break; + case compiler_type::icc: extras = {".d"}; break; } return perform_clean_extra (a, t, extras); |