From d3e40f931e65fcd86801630edd694e1a9a210ac4 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 16 Jun 2017 13:20:00 +0200 Subject: Store module information in depdb --- build2/cc/compile.cxx | 315 ++++++++++++++++++++++++++++++++++++-------------- build2/cc/compile.hxx | 14 ++- build2/cc/parser.cxx | 10 +- build2/cc/types.hxx | 13 ++- 4 files changed, 254 insertions(+), 98 deletions(-) (limited to 'build2') diff --git a/build2/cc/compile.cxx b/build2/cc/compile.cxx index a3610fb..a7ad2fa 100644 --- a/build2/cc/compile.cxx +++ b/build2/cc/compile.cxx @@ -29,6 +29,81 @@ namespace build2 { using namespace bin; + // module_info string serialization. + // + // The string representation is a space-separated list of module names + // with the following rules: + // + // 1. If this is a module interface unit, then the first name is the + // module name intself following by either '!' for an interface unit or + // by '+' for an implementation unit. + // + // 2. If an imported module is re-exported, then the module name is + // followed by '*'. + // + // For example: + // + // foo! foo.core* foo.base* foo.impl + // foo.base+ foo.impl + // foo.base foo.impl + // + static string + to_string (const module_info& m) + { + string s; + + if (!m.name.empty ()) + { + s += m.name; + s += m.iface ? '!' : '+'; + } + + for (const module_import& i: m.imports) + { + if (!s.empty ()) + s += ' '; + + s += i.name; + + if (i.exported) + s += '*'; + } + + return s; + } + + static module_info + to_module_info (const string& s) + { + module_info m; + + for (size_t b (0), e (0), n; (n = next_word (s, b, e, ' ')) != 0; ) + { + char c (s[e - 1]); + switch (c) + { + case '!': + case '+': + case '*': break; + default: c = '\0'; + } + + string w (s, b, n - (c == '\0' ? 0 : 1)); + + if (c == '!' || c == '+') + { + m.name = move (w); + m.iface = (c == '!'); + } + else + m.imports.push_back (module_import {move (w), c == '*', 0}); + } + + return m; + } + + // preprocessed + // template inline bool operator< (preprocessed l, T r) // Template because of VC14 bug. @@ -62,7 +137,7 @@ namespace build2 compile:: compile (data&& d) : common (move (d)), - rule_id (string (x) += ".compile 1") + rule_id (string (x) += ".compile 3") { static_assert (sizeof (compile::match_data) <= target::data_size, "insufficient space"); @@ -608,7 +683,8 @@ namespace build2 // sha256 cs; - // This affects how we compile the source so factor it in. + // This affects how we compile the source as well as the format of + // depdb so factor it in. // cs.append (&md.pp, sizeof (md.pp)); @@ -695,30 +771,66 @@ namespace build2 if (md.pp < preprocessed::includes) p = extract_headers (act, t, lo, src, md, dd, u); + // Next we "obtain" the translation unit information. What exactly + // "obtain" entails is tricky: If things changed, then we re-parse the + // translation unit. Otherwise, we re-create this information from + // depdb. We, however, have to do it here and now in case the database + // is invalid and we still have to fallback to re-parse. + // + translation_unit tu; + for (bool f (true);; f = false) + { + if (u) + tu = parse_unit (act, t, lo, src, p.first, md); + + if (modules) + { + if (u) + { + string s (to_string (tu.mod)); + + if (f) + dd.expect (s); + else + dd.write (s); + } + else + { + if (string* l = dd.read ()) + tu.mod = to_module_info (*l); + else + { + // Database is invalid, re-parse. + // + u = true; + continue; + } + } + } + + break; + } + + // Extract the module dependency information in addition to header + // dependencies. + // + // NOTE: assumes that no further targets will be added into + // t.prerequisite_targets! + // + extract_modules (act, t, lo, tt, src, md, move (tu.mod), dd, u); + // If anything got updated, then we didn't rely on the cache. However, // the cached data could actually have been valid and the compiler run // in extract_headers() merely validated it. // - // We do need to update the database timestamp, however. Failed - // that, we will keep re-validating the cached data over and over - // again. + // We do need to update the database timestamp, however. Failed that, + // we will keep re-validating the cached data over and over again. // if (u && dd.reading ()) dd.touch (); dd.close (); - // Extract the module dependency information in addition to header - // dependencies above. - // - // NOTE: assumes that no further targets will be added into - // t.prerequisite_targets! - // - if (u) // @@ TMP (depdb validation similar to extract_headers()). - { - extract_modules (act, t, lo, tt, src, p.first, md, dd, u); - } - // If the preprocessed output is suitable for compilation and is not // disabled, then pass it along. // @@ -1745,7 +1857,9 @@ namespace build2 // assert (skip_count == 0); - while (dd.more ()) + // We should always end with a blank line. + // + for (;;) { string* l (dd.read ()); @@ -1757,6 +1871,9 @@ namespace build2 break; } + if (l->empty ()) // Done, nothing changed, no preprocessed output. + return make_pair (auto_rmfile (), false); + restart = add (path (move (*l)), true); skip_count++; @@ -2089,39 +2206,33 @@ namespace build2 } } + // Add the terminating blank line (we are updated depdb). + // + dd.expect (""); + puse = puse && !psrc.path ().empty (); return make_pair (move (psrc), puse); } - // Extract and inject module dependencies. - // - void compile:: - extract_modules (action act, - file& t, - lorder lo, - const compile_target_types& tt, - const file& src, - auto_rmfile& psrc, - match_data& md, - depdb& /*dd*/, - bool& /*updating*/) const + translation_unit compile:: + parse_unit (action act, + file& t, + lorder lo, + const file& src, + auto_rmfile& psrc, + const match_data& md) const { - tracer trace (x, "compile::extract_modules"); - - l5 ([&]{trace << "target: " << t;}); + tracer trace (x, "compile::parse_unit"); - // If things go wrong (and they often do in this area), give the user a - // bit extra context. + // If things go wrong give the user a bit extra context. // auto df = make_diag_frame ( [&src](const diag_record& dr) { if (verb != 0) - dr << info << "while extracting module dependencies from " << src; + dr << info << "while parsing " << src; }); - const scope& bs (t.base_scope ()); - // For some compilers (GCC, Clang) the preporcessed output is only // partially preprocessed. For others (VC), it is already fully // preprocessed (well, almost: it still has comments but we can handle @@ -2150,7 +2261,7 @@ namespace build2 // args.push_back (cpath.recall_string ()); - append_lib_options (bs, args, t, act, lo); + append_lib_options (t.base_scope (), args, t, act, lo); append_options (args, t, c_poptions); append_options (args, t, x_poptions); @@ -2221,8 +2332,6 @@ namespace build2 // Preprocess and parse. // - translation_unit tu; - for (;;) // Breakout loop. try { @@ -2256,7 +2365,7 @@ namespace build2 fdstream_mode::text | fdstream_mode::skip); parser p; - tu = p.parse (is, rels); + translation_unit tu (p.parse (is, rels)); is.close (); @@ -2265,7 +2374,14 @@ namespace build2 if (ps) psrc = auto_rmfile (move (rels)); // Re-arm. - break; + // VC15 is not (yet) using the 'export module' syntax so use the + // preprequisite type to distinguish between interface and + // implementation units. + // + if (cid == compiler_id::msvc && src.is_a (*x_mod)) + tu.mod.iface = true; + + return tu; } // Fall through. @@ -2282,14 +2398,16 @@ namespace build2 const process_exit& e (*pr.exit); // What should we do with a normal error exit? Remember we suppressed - // the compiler's diagnostics. Let's issue a warning and continue with - // the assumption that the compilation step fails with diagnostics. + // the compiler's diagnostics. We used to issue a warning and continue + // with the assumption that the compilation step will fail with + // diagnostics. The problem with this approach is that we may fail + // before that because the information we return (e.g., module name) + // is bogus. So looks like failing is the only option. // if (e.normal ()) { - warn << "unable to extract module dependency information from " - << src; - return; + fail << "unable to preprocess " << src << + info << "re-run with -s -V to display the failing command"; } else fail << args[0] << " terminated abnormally: " << e.description (); @@ -2300,75 +2418,98 @@ namespace build2 if (e.child) exit (1); - - throw failed (); } - // Sanity checks. + throw failed (); + } + + // Extract and inject module dependencies. + // + void compile:: + extract_modules (action act, + file& t, + lorder lo, + const compile_target_types& tt, + const file& src, + match_data& md, + module_info&& mi, + depdb& dd, + bool& updating) const + { + tracer trace (x, "compile::extract_modules"); + l5 ([&]{trace << "target: " << t;}); + + // If things go wrong, give the user a bit extra context. // - if (modules) - { - // If we are compiling a module interface unit, make sure it has the - // necessary declarations. - // - if (src.is_a (*x_mod)) + auto df = make_diag_frame ( + [&src](const diag_record& dr) { - // VC is not (yet) using the 'export module' syntax so use the - // preprequisite type to distinguish between interface and - // implementation units. - // - if (cid == compiler_id::msvc) - tu.module_interface = true; + if (verb != 0) + dr << info << "while extracting module dependencies from " << src; + }); - if (tu.module_name.empty () || !tu.module_interface) - fail << src << " is not a module interface unit"; - } - } + if (!modules) + { + if (!mi.name.empty () || !mi.imports.empty ()) + fail << "modules support not enabled or unavailable"; - if (tu.module_name.empty () && tu.module_imports.empty ()) return; + } - // Modules are used by this translation unit. Make sure module support - // is enabled. + // Sanity checks. // - if (!modules) - fail << "modules support not enabled or unavailable"; + // If we are compiling a module interface unit, make sure it has the + // necessary declarations. + // + if (src.is_a (*x_mod) && (mi.name.empty () || !mi.iface)) + fail << src << " is not a module interface unit"; // Search and match all the modules we depend on. If this is a module // implementation unit, then treat the module itself as if it was - // imported. + // imported. Note: move. + // + if (!mi.iface && !mi.name.empty ()) + mi.imports.push_back (module_import {move (mi.name), false, 0}); + + // The change to the set of imports would have required a change to + // source code (or options). Changes to the bmi{}s themselves will be + // detected via the normal prerequisite machinery. However, the same set + // of imports could be resolved to a different set of bmi{}s (in a sense + // similar to changing the source file). To detect this we calculate and + // store a hash of all (not just direct) bmi{}'s paths. // - if (!tu.module_interface && !tu.module_name.empty ()) - tu.module_imports.push_back ( - module_import {move (tu.module_name), false, 0}); + sha256 cs; - if (!tu.module_imports.empty ()) + if (!mi.imports.empty ()) md.mod_pos = search_modules ( - bs, act, t, lo, tt.bmi, move (tu.module_imports)); + act, t, lo, tt.bmi, move (mi.imports), cs); + + if (dd.expect (cs.string ()) != nullptr) + updating = true; // Set the cc.module_name variable if this is an interface unit. Note // that it may seem like a good idea to set it on the bmi{} group to // avoid duplication. We, however, cannot do it MT-safely since we don't // match the group. // - if (tu.module_interface) + if (mi.iface) { if (value& v = t.vars.assign (c_module_name)) - assert (cast (v) == tu.module_name); + assert (cast (v) == mi.name); else - v = move (tu.module_name); // Note: move. + v = move (mi.name); // Note: move. } } // Resolve imported modules to bmi*{} targets. // modules_positions compile:: - search_modules (const scope& /*bs*/, //@@ MOD: no need? - action act, + search_modules (action act, file& t, lorder lo, const target_type& mtt, - module_imports&& imports) const + module_imports&& imports, + sha256& cs) const { // So we have a list of imports and a list of "potential" module // prerequisites. They are potential in the sense that they may or may @@ -2691,7 +2832,8 @@ namespace build2 // match_members (act, t, pts, start); - // Post-process the list of our (direct) imports. + // Post-process the list of our (direct) imports. While at it, calculate + // the checksum of all (direct and indirect) bmi{} paths. // size_t ex_start (n); size_t ex_tail (pts.size ()); @@ -2731,6 +2873,10 @@ namespace build2 if (m.exported && ex_start == n) ex_start = i; + // Hash (we know it's a file). + // + cs.append (static_cast (bt).path ().string ()); + // Copy over re-exported bmi{} from our prerequisites weeding out // duplicates. // @@ -2751,6 +2897,7 @@ namespace build2 }) == imports.end ()) { pts.push_back (et); + cs.append (static_cast (*et).path ().string ()); // Add to the list of imports for further duplicate suppression. // We could have probably stored reference to the name (e.g., in diff --git a/build2/cc/compile.hxx b/build2/cc/compile.hxx index c98e407..d1b14bc 100644 --- a/build2/cc/compile.hxx +++ b/build2/cc/compile.hxx @@ -101,15 +101,19 @@ namespace build2 extract_headers (action, file&, lorder, const file&, const match_data&, depdb&, bool&) const; + + translation_unit + parse_unit (action, file&, lorder, + const file&, auto_rmfile&, const match_data&) const; + void extract_modules (action, file&, lorder, const compile_target_types&, - const file&, auto_rmfile&, match_data&, - depdb&, bool&) const; + const file&, match_data&, + module_info&&, depdb&, bool&) const; modules_positions - search_modules (const scope&, - action, file&, lorder, const target_type&, - module_imports&&) const; + search_modules (action, file&, lorder, const target_type&, + module_imports&&, sha256&) const; void append_modules (cstrings&, strings&, const file&) const; diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx index 3b7951d..aae49dc 100644 --- a/build2/cc/parser.cxx +++ b/build2/cc/parser.cxx @@ -21,7 +21,7 @@ namespace build2 lexer l (is, name); l_ = &l; - translation_unit u {"", false, {}}; + translation_unit u {{"", false, {}}}; u_ = &u; // If the source has errors then we want the compiler to issues the @@ -146,7 +146,7 @@ namespace build2 // Ignore duplicates. We don't expect a large numbers of imports so // vector/linear search is probably more efficient than a set. // - auto& is (u_->module_imports); + auto& is (u_->mod.imports); auto i (find_if (is.begin (), is.end (), [&n] (const module_import& i) @@ -176,11 +176,11 @@ namespace build2 if (t.type != type::semi) fail (t) << "';' expected instead of " << t; - if (!u_->module_name.empty ()) + if (!u_->mod.name.empty ()) fail (t) << "multiple module declarations"; - u_->module_name = move (n); - u_->module_interface = ex; + u_->mod.name = move (n); + u_->mod.iface = ex; } string parser:: diff --git a/build2/cc/types.hxx b/build2/cc/types.hxx index 17f6b96..3448827 100644 --- a/build2/cc/types.hxx +++ b/build2/cc/types.hxx @@ -14,7 +14,7 @@ namespace build2 { namespace cc { - // Translation unit information (currently modules). + // Translation unit information. // struct module_import { @@ -25,11 +25,16 @@ namespace build2 using module_imports = vector; + struct module_info + { + string name; // Not empty if a module unit. + bool iface; // True if a module interface unit. + module_imports imports; // Imported modules. + }; + struct translation_unit { - string module_name; // Not empty if a module unit. - bool module_interface; // True if a module interface unit. - cc::module_imports module_imports; // Imported modules. + module_info mod; }; // Compiler language. -- cgit v1.1