From 0a52676ff3de5b302eb4fa85ed8440ae14281318 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 30 Jun 2021 13:01:16 +0200 Subject: Move symbol exporting .def file rule to bin.def module, add support for MinGW The bin.def module is automatically loaded by the c and cxx modules for the *-win32-msvc target architecture. This allows automatically exporting all symbols for all Windows targets using the following setup (showing for cxx in this example): lib{foo}: libul{foo}: {hxx cxx}{**} ... lib{foo}: def{foo}: include = ($cxx.target.system == 'win32-msvc') def{foo}: libul{foo} if ($cxx.target.system == 'mingw32') cxx.loptions += -Wl,--export-all-symbols That is, we use the .def file generation for MSVC and the built-in support (--export-all-symbols) for MinGW. But it is also possible to use the .def file generation for MinGW. In this case we need to explicitly load the bin.def module (which should be done after loading c or cxx) and using the following setup: using bin.def # In root.build. lib{foo}: libul{foo}: {hxx cxx}{**} ... lib{foo}: def{foo}: include = ($cxx.target.class == 'windows') def{foo}: libul{foo} --- libbuild2/bin/def-rule.cxx | 381 +++++++++++++++++++++++++++++---------------- libbuild2/bin/init.cxx | 50 ++++-- libbuild2/bin/init.hxx | 6 + libbuild2/cc/init.cxx | 6 +- 4 files changed, 288 insertions(+), 155 deletions(-) (limited to 'libbuild2') diff --git a/libbuild2/bin/def-rule.cxx b/libbuild2/bin/def-rule.cxx index 3805f3b..49d0ac0 100644 --- a/libbuild2/bin/def-rule.cxx +++ b/libbuild2/bin/def-rule.cxx @@ -25,13 +25,13 @@ namespace build2 }; static void - parse_dumpbin (istream& is, symbols& syms) + read_dumpbin (istream& is, symbols& syms) { // Lines that describe symbols look like: // // 0 1 2 3 4 5 6 // IDX OFFSET SECT SYMTYPE VISIBILITY SYMNAME - // ------------------------------------------------------------------------ + // ---------------------------------------------------------------------- // 02E 00000130 SECTA notype External | _standbyState // 02F 00000009 SECT9 notype Static | _LocalRecoveryInProgress // 064 00000020 SECTC notype () Static | _XLogCheckBuffer @@ -175,7 +175,7 @@ namespace build2 } static void - parse_llvm_nm (istream& is, symbols& syms) + read_posix_nm (istream& is, symbols& syms) { // Lines that describe symbols look like: // @@ -214,6 +214,202 @@ namespace build2 } } + static void + write_win32_msvc (ostream& os, const symbols& syms, bool i386) + { + // Our goal here is to export the same types of symbols as what gets + // exported by MSVC with __declspec(dllexport) (can be viewed with + // dumpbin /EXPORTS). + // + // Some special C++ symbol patterns: + // + // Data symbols: + // + // ??_C* -- string literal (R, not exported) + // ??_7* -- vtable (R, exported) + // ??_R* -- rtti, can be prefixed with _CT/__CT (D/R, not exported) + // + // Text symbols: + // + // ??_G* -- scalar deleting destructor (not exported) + // ??_E* -- vector deleting destructor (not exported) + // + // The following two symbols seem to be related to exception + // throwing and most likely should not be exported. + // + // R _CTA3?AVinvalid_argument@std@@ + // R _TI3?AVinvalid_argument@std@@ + // + // There are also what appears to be floating point literals: + // + // R __real@3f80000 + // + // For some reason i386 object files have extern "C" symbols (both + // data and text) prefixed with an underscore which must be stripped + // in the .def file. + // + // Note that the extra prefix seems to be also added to special + // symbols so something like _CT??... becomes __CT??... on i386. + // However, for such symbols the underscore shall not be removed. + // Which means an extern "C" _CT becomes __CT on i383 and hard to + // distinguish from the special symbols. We deal with this by only + // stripping the underscore if the symbols doesn't contain any + // special characters (?@). + // + auto extern_c = [] (const string& s) + { + return s.find_first_of ("?@") == string::npos; + }; + + auto strip = [i386, &extern_c] (const string& s) -> const char* + { + const char* r (s.c_str ()); + + if (i386 && s[0] == '_' && extern_c (s)) + r++; + + return r; + }; + + // Code. + // + for (const string& s: syms.t) + { + auto filter = [&strip] (const string& s) -> const char* + { + if (s.compare (0, 4, "??_G") == 0 || + s.compare (0, 4, "??_E") == 0) + return nullptr; + + return strip (s); + }; + + if (const char* v = filter (s)) + os << " " << v << '\n'; + } + + // Data. + // + // Note that it's not easy to import data without a dllimport + // declaration. + // + { + auto filter = [&strip] (const string& s) -> const char* + { + if (s.compare (0, 4, "??_R") == 0 || + s.compare (0, 4, "??_C") == 0) + return nullptr; + + return strip (s); + }; + + for (const string& s: syms.d) + if (const char* v = filter (s)) + os << " " << v << " DATA\n"; + + for (const string& s: syms.b) + if (const char* v = filter (s)) + os << " " << v << " DATA\n"; + + // Read-only data contains an especially large number of various + // special symbols. Instead of trying to filter them out case by case, + // we will try to recognize C/C++ identifiers plus the special symbols + // that we need to export (e.g., vtable). + // + // + for (const string& s: syms.r) + { + if (extern_c (s) || // C + (s[0] == '?' && s[1] != '?') || // C++ + s.compare (0, 4, "??_7") == 0) // vtable + { + os << " " << strip (s) << " DATA\n"; + } + } + } + } + + static void + write_mingw32 (ostream& os, const symbols& syms, bool i386) + { + // Our goal here is to export the same types of symbols as what gets + // exported by GCC with __declspec(dllexport) (can be viewed with + // dumpbin /EXPORTS). + // + // Some special C++ symbol patterns (Itanium C++ ABI): + // + // Data symbols: + // + // _ZTVN* -- vtable (R, exported) + // _ZTIN* -- typeinfo (R, exported) + // _ZTSN* -- typeinfo name (R, not exported) + // + // There are also some special R symbols which start with .refptr. + // that are not exported. + // + // Normal symbols (both text and data) appear to start with _ZN. + // + // Note that we have the same extra underscore for i386 as in the + // win32-msvc case above but here even for mangled symbols (e.g., __Z*). + // + auto skip = [i386] (const string& s) -> size_t + { + return i386 && s[0] == '_' ? 1 : 0; + }; + + // Code. + // + for (const string& s: syms.t) + { + auto filter = [&skip] (const string& s) -> const char* + { + return s.c_str () + skip (s); + }; + + if (const char* v = filter (s)) + os << " " << v << '\n'; + } + + // Data. + // + { + auto filter = [&skip] (const string& s) -> const char* + { + return s.c_str () + skip (s); + }; + + for (const string& s: syms.d) + if (const char* v = filter (s)) + os << " " << v << " DATA\n"; + + for (const string& s: syms.b) + if (const char* v = filter (s)) + os << " " << v << " DATA\n"; + + // Read-only data contains an especially large number of various + // special symbols. Instead of trying to filter them out case by case, + // we will try to recognize C/C++ identifiers plus the special symbols + // that we need to export (e.g., vtable and typeinfo). + // + for (const string& s: syms.r) + { + if (s.find_first_of (".") != string::npos) // Special (.refptr.*) + continue; + + size_t p (skip (s)), n (s.size () - p); + + if ((n < 2 || s[p] != '_' || s[p + 1] != 'Z') || // C + (s[p + 2] == 'N' ) || // C++ (normal) + (s[p + 2] == 'T' && (s[p + 3] == 'V' || // vtable + s[p + 3] == 'I') && // typeinfo + s[p + 4] == 'N')) + { + os << " " << s.c_str () + p << " DATA\n"; + } + } + } + } + bool def_rule:: match (action a, target& t, const string&) const { @@ -297,10 +493,12 @@ namespace build2 const scope& bs (t.base_scope ()); const scope& rs (*bs.root_scope ()); - // For link.exe we use its /DUMP option to access dunpbin.exe. For - // lld-link we use llvm-nm. + // For link.exe we use its /DUMP option to access dumpbin.exe. Otherwise + // (lld-link, MinGW), we use nm (llvm-nm, MinGW nm). For good measure + // (e.g., the bin.def module is loaded without bin.ld), we also handle + // the direct dumpbin.exe usage. // - const string& lid (cast (rs["bin.ld.id"])); + const string& lid (cast_empty (rs["bin.ld.id"])); // Update prerequisites and determine if anything changed. // @@ -320,9 +518,9 @@ namespace build2 // Then the nm checksum. // - if (dd.expect (lid == "msvc-lld" - ? cast (rs["bin.nm.checksum"]) - : cast (rs["bin.ld.checksum"])) != nullptr) + if (dd.expect (lid == "msvc" + ? cast (rs["bin.ld.checksum"]) + : cast (rs["bin.nm.checksum"])) != nullptr) l4 ([&]{trace << "linker mismatch forcing update of " << t;}); // @@ TODO: track in depdb if making symbol filtering configurable. @@ -381,28 +579,36 @@ namespace build2 if (!update) return *ts; - const process_path& nm (lid == "msvc-lld" - ? cast (rs["bin.nm.path"]) - : cast (rs["bin.ld.path"])); - - const string& cpu (cast (rs["bin.target.cpu"])); - bool i386 (cpu.size () == 4 && - cpu[0] == 'i' && cpu[2] == '8' && cpu[3] == '6'); + const process_path& nm (lid == "msvc" + ? cast (rs["bin.ld.path"]) + : cast (rs["bin.nm.path"])); cstrings args {nm.recall_string ()}; - if (lid == "msvc-lld") - { - args.push_back ("--no-weak"); - args.push_back ("--defined-only"); - args.push_back ("--format=posix"); - } - else + string nid; + if (lid == "msvc") { args.push_back ("/DUMP"); // Must come first. args.push_back ("/NOLOGO"); args.push_back ("/SYMBOLS"); } + else + { + nid = cast (rs["bin.nm.id"]); + + if (nid == "msvc") + { + args.push_back ("/NOLOGO"); + args.push_back ("/SYMBOLS"); + } + else + { + // Note that llvm-nm's --no-weak is only available since LLVM 7. + // + args.push_back ("--extern-only"); + args.push_back ("--format=posix"); + } + } args.push_back (nullptr); // Argument placeholder. args.push_back (nullptr); @@ -428,9 +634,9 @@ namespace build2 if (ctx.dry_run) continue; - // Both link.exe /DUMP and llvm-nm send their output to stdout. While - // llvm-nm sends diagnostics to stderr, link.exe sends it to stdout - // together with the output. + // Both dumpbin.exe and nm send their output to stdout. While nm sends + // diagnostics to stderr, dumpbin sends it to stdout together with the + // output. // process pr (run_start (nm, args, @@ -442,10 +648,10 @@ namespace build2 ifdstream is ( move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit); - if (lid == "msvc-lld") - parse_llvm_nm (is, syms); + if (lid == "msvc" || nid == "msvc") + read_dumpbin (is, syms); else - parse_dumpbin (is, syms); + read_posix_nm (is, syms); is.close (); } @@ -473,8 +679,12 @@ namespace build2 if (!ctx.dry_run) { - auto_rmfile rm (tp); + const auto& tgt (cast (rs["bin.target"])); + + bool i386 (tgt.cpu.size () == 4 && + tgt.cpu[0] == 'i' && tgt.cpu[2] == '8' && tgt.cpu[3] == '6'); + auto_rmfile rm (tp); try { ofdstream os (tp); @@ -482,113 +692,10 @@ namespace build2 os << "; Auto-generated, do not edit.\n" << "EXPORTS\n"; - // Our goal here is to export the same types of symbols as what gets - // exported with __declspec(dllexport) (which can be viewed with - // dumpbin /EXPORTS). - // - // Some special C++ symbol patterns: - // - // Data symbols: - // - // ??_C* -- string literal (R, not exported) - // ??_7* -- vtable (R, exported) - // ??_R* -- rtti, can be prefixed with _CT/__CT (D/R, not exported) - // - // Text symbols: - // - // ??_G* -- scalar deleting destructor (not exported) - // ??_E* -- vector deleting destructor (not exported) - // - // The following two symbols seem to be related to exception - // throwing and most likely should not be exported. - // - // R _CTA3?AVinvalid_argument@std@@ - // R _TI3?AVinvalid_argument@std@@ - // - // There are also what appears to be floating point literals: - // - // R __real@3f80000 - // - // For some reason i386 object files have extern "C" symbols (both - // data and text) prefixed with an underscore which must be stripped - // in the .def file. - // - // Note that the extra prefix seems to be also added to special - // symbols so something like _CT??... becomes __CT??... on i386. - // However, for such symbols the underscore shall not be removed. - // Which means an extern "C" _CT becomes __CT on i383 and hard to - // distinguish from the special symbols. We deal with this by only - // stripping the underscore if the symbols doesn't contain any - // special characters (?@). - // - auto extern_c = [] (const string& s) - { - return s.find_first_of ("?@") == string::npos; - }; - - auto strip = [i386, &extern_c] (const string& s) -> const char* - { - const char* r (s.c_str ()); - - if (i386 && s[0] == '_' && extern_c (s)) - r++; - - return r; - }; - - for (const string& s: syms.t) - { - auto filter = [&strip] (const string& s) -> const char* - { - if (s.compare (0, 4, "??_G") == 0 || - s.compare (0, 4, "??_E") == 0) - return nullptr; - - return strip (s); - }; - - if (const char* v = filter (s)) - os << " " << v << '\n'; - } - - // Note that it's not easy to import data without a dllimport - // declaration. - // - if (true) - { - auto filter = [&strip] (const string& s) -> const char* - { - if (s.compare (0, 4, "??_R") == 0 || - s.compare (0, 4, "??_C") == 0) - return nullptr; - - return strip (s); - }; - - for (const string& s: syms.d) - if (const char* v = filter (s)) - os << " " << v << " DATA\n"; - - for (const string& s: syms.b) - if (const char* v = filter (s)) - os << " " << v << " DATA\n"; - - // Read-only data contains an especially large number of various - // special symbols. Instead of trying to filter them out case by - // case, we will try to recognize C/C++ identifiers plus the - // special symbols that we need to export (e.g., vtable). - // - // - for (const string& s: syms.r) - { - if (extern_c (s) || // C - (s[0] == '?' && s[1] != '?') || // C++ - s.compare (0, 4, "??_7") == 0) // vtable - { - os << " " << strip (s) << " DATA\n"; - } - } - } + if (tgt.system == "mingw32") + write_mingw32 (os, syms, i386); + else + write_win32_msvc (os, syms, i386); os.close (); rm.cancel (); diff --git a/libbuild2/bin/init.cxx b/libbuild2/bin/init.cxx index 7d1f171..02321c2 100644 --- a/libbuild2/bin/init.cxx +++ b/libbuild2/bin/init.cxx @@ -939,21 +939,6 @@ namespace build2 } } - // Register .def file rule. - // - if (lid == "msvc" || lid == "msvc-lld") - { - // If we are using link.exe, then we can access dumpbin via the - // link.exe /DUMP option. But for lld-link we need llvm-nm. - // - if (lid == "msvc-lld") - load_module (rs, bs, "bin.nm.config", loc, extra.hints); - - bs.insert_rule (perform_update_id, "bin.def", def_); - bs.insert_rule (perform_clean_id, "bin.def", def_); - bs.insert_rule (configure_update_id, "bin.def", def_); - } - return true; } @@ -1092,7 +1077,8 @@ namespace build2 // // Use the target to decide on the default nm name. Note that in case // of win32-msvc this is insufficient and we fallback to the linker - // type (if available) to decide between dumpbin and llvm-nm. + // type (if available) to decide between dumpbin and llvm-nm (with + // fallback to dumpbin). // // Finally note that the dumpbin.exe functionality is available via // link.exe /DUMP. @@ -1164,6 +1150,37 @@ namespace build2 return true; } + bool + def_init (scope& rs, + scope& bs, + const location& loc, + bool, + bool, + module_init_extra& extra) + { + tracer trace ("bin::def_init"); + l5 ([&]{trace << "for " << bs;}); + + // Make sure the bin core is loaded (def{} target type). We also load + // nm.config unless we are using MSVC link.exe and can access dumpbin + // via its /DUMP option. + // + const string* lid (cast_null (rs["bin.ld.id"])); + + load_module (rs, bs, "bin", loc, extra.hints); + + if (lid == nullptr || *lid != "msvc") + load_module (rs, bs, "bin.nm.config", loc, extra.hints); + + // Register the def{} rule. + // + bs.insert_rule (perform_update_id, "bin.def", def_); + bs.insert_rule (perform_clean_id, "bin.def", def_); + bs.insert_rule (configure_update_id, "bin.def", def_); + + return true; + } + static const module_functions mod_functions[] = { // NOTE: don't forget to also update the documentation in init.hxx if @@ -1180,6 +1197,7 @@ namespace build2 {"bin.rc", nullptr, rc_init}, {"bin.nm.config", nullptr, nm_config_init}, {"bin.nm", nullptr, nm_init}, + {"bin.def", nullptr, def_init}, {nullptr, nullptr, nullptr} }; diff --git a/libbuild2/bin/init.hxx b/libbuild2/bin/init.hxx index 6b0db27..4eb0f10 100644 --- a/libbuild2/bin/init.hxx +++ b/libbuild2/bin/init.hxx @@ -25,14 +25,20 @@ namespace build2 // rules. // `bin.ar.config` -- loads bin.config and registers/sets more variables. // `bin.ar` -- loads bin and bin.ar.config. + // // `bin.ld.config` -- loads bin.config and registers/sets more variables. // `bin.ld` -- loads bin and bin.ld.config and registers more // target types for msvc. + // // `bin.rc.config` -- loads bin.config and registers/sets more variables. // `bin.rc` -- loads bin and bin.rc.config. + // // `bin.nm.config` -- loads bin.config and registers/sets more variables. // `bin.nm` -- loads bin and bin.nm.config. // + // `bin.def` -- loads bin, bin.nm.config unless using MSVC link.exe, + // and registers the .def file generation rule. + // extern "C" LIBBUILD2_BIN_SYMEXPORT const module_functions* build2_bin_load (); } diff --git a/libbuild2/cc/init.cxx b/libbuild2/cc/init.cxx index eae6d6d..f201d79 100644 --- a/libbuild2/cc/init.cxx +++ b/libbuild2/cc/init.cxx @@ -340,15 +340,17 @@ namespace build2 } } - // Load bin.*.config for bin.* modules we may need (see core_init() - // below). + // Load bin.* modules we may need (see core_init() below). // const string& tsys (cast (rs["cc.target.system"])); load_module (rs, rs, "bin.ar.config", loc); if (tsys == "win32-msvc") + { load_module (rs, rs, "bin.ld.config", loc); + load_module (rs, rs, "bin.def", loc); + } if (tsys == "mingw32") load_module (rs, rs, "bin.rc.config", loc); -- cgit v1.1