diff options
Diffstat (limited to 'libbuild2/bin/def-rule.cxx')
-rw-r--r-- | libbuild2/bin/def-rule.cxx | 239 |
1 files changed, 186 insertions, 53 deletions
diff --git a/libbuild2/bin/def-rule.cxx b/libbuild2/bin/def-rule.cxx index ab31fde..143cc35 100644 --- a/libbuild2/bin/def-rule.cxx +++ b/libbuild2/bin/def-rule.cxx @@ -7,6 +7,7 @@ #include <libbuild2/scope.hxx> #include <libbuild2/target.hxx> #include <libbuild2/algorithm.hxx> +#include <libbuild2/filesystem.hxx> #include <libbuild2/diagnostics.hxx> #include <libbuild2/bin/target.hxx> @@ -16,17 +17,26 @@ namespace build2 { namespace bin { + // In C global uninitialized data becomes a "common symbol" (an equivalent + // definition compiled as C++ results in a BSS symbol) which allows some + // archaic merging of multiple such definitions during linking (see GNU ld + // --warn-common for background). Note that this merging may happen with + // other data symbol types, not just common. + // struct symbols { set<string> d; // data set<string> r; // read-only data set<string> b; // uninitialized data (BSS) + set<string> c; // common uninitialized data set<string> t; // text (code) }; static void - read_dumpbin (istream& is, symbols& syms) + read_dumpbin (diag_buffer& dbuf, ifdstream& is, symbols& syms) { + // Note: io_error is handled by the caller. + // Lines that describe symbols look like: // // 0 1 2 3 4 5 6 @@ -62,29 +72,29 @@ namespace build2 // B44 00000000 SECT4 notype Static | .rdata$r // AA2 00000000 SECT5 notype Static | .bss // - - // Map of read-only (.rdata, .xdata) and uninitialized (.bss) sections - // to their types (R and B, respectively). If a section is not found in - // this map, then it's assumed to be normal data (.data). + // Note that an UNDEF data symbol with non-zero OFFSET is a "common + // symbol", equivalent to the nm `C` type. // - map<string, char> sections; - - string l; - while (!eof (getline (is, l))) + // We keep a map of read-only (.rdata, .xdata) and uninitialized (.bss) + // sections to their types (R and B, respectively). If a section is not + // found in this map, then it's assumed to be normal data (.data). + // + auto parse_line = [&syms, + secs = map<string, char> ()] (const string& l) mutable { size_t b (0), e (0), n; // IDX (note that it can be more than 3 characters). // if (next_word (l, b, e) == 0) - continue; + return; // OFFSET (always 8 characters). // n = next_word (l, b, e); if (n != 8) - continue; + return; string off (l, b, n); @@ -92,8 +102,8 @@ namespace build2 // n = next_word (l, b, e); - if (n == 0 || l.compare (b, n, "UNDEF") == 0) - continue; + if (n == 0) + return; string sec (l, b, n); @@ -102,23 +112,23 @@ namespace build2 n = next_word (l, b, e); if (l.compare (b, n, "notype") != 0) - continue; + return; - bool d; + bool dat; if (l[e] == ' ' && l[e + 1] == '(' && l[e + 2] == ')') { e += 3; - d = false; + dat = false; } else - d = true; + dat = true; // VISIBILITY // n = next_word (l, b, e); if (n == 0) - continue; + return; string vis (l, b, n); @@ -127,20 +137,24 @@ namespace build2 n = next_word (l, b, e); if (n != 1 || l[b] != '|') - continue; + return; // SYMNAME // n = next_word (l, b, e); if (n == 0) - continue; + return; string s (l, b, n); // See if this is the section type symbol. // - if (d && off == "00000000" && vis == "Static" && s[0] == '.') + if (dat && + off == "00000000" && + sec != "UNDEF" && + vis == "Static" && + s[0] == '.') { auto cmp = [&s] (const char* n, size_t l) { @@ -148,43 +162,88 @@ namespace build2 }; if (cmp (".rdata", 6) || - cmp (".xdata", 6)) sections.emplace (move (sec), 'R'); - else if (cmp (".bss", 4)) sections.emplace (move (sec), 'B'); + cmp (".xdata", 6)) secs.emplace (move (sec), 'R'); + else if (cmp (".bss", 4)) secs.emplace (move (sec), 'B'); - continue; + return; } // We can only export extern symbols. // if (vis != "External") - continue; + return; - if (d) + if (dat) { - auto i (sections.find (sec)); - switch (i == sections.end () ? 'D' : i->second) + if (sec != "UNDEF") { - case 'D': syms.d.insert (move (s)); break; - case 'R': syms.r.insert (move (s)); break; - case 'B': syms.b.insert (move (s)); break; + auto i (secs.find (sec)); + switch (i == secs.end () ? 'D' : i->second) + { + case 'D': syms.d.insert (move (s)); break; + case 'R': syms.r.insert (move (s)); break; + case 'B': syms.b.insert (move (s)); break; + } + } + else + { + if (off != "00000000") + syms.c.insert (move (s)); } } else - syms.t.insert (move (s)); + { + if (sec != "UNDEF") + syms.t.insert (move (s)); + } + }; + + // Read until we reach EOF on all streams. + // + // Note that if dbuf is not opened, then we automatically get an + // inactive nullfd entry. + // + fdselect_set fds {is.fd (), dbuf.is.fd ()}; + fdselect_state& ist (fds[0]); + fdselect_state& dst (fds[1]); + + for (string l; ist.fd != nullfd || dst.fd != nullfd; ) + { + if (ist.fd != nullfd && getline_non_blocking (is, l)) + { + if (eof (is)) + ist.fd = nullfd; + else + { + parse_line (l); + l.clear (); + } + + continue; + } + + ifdselect (fds); + + if (dst.ready) + { + if (!dbuf.read ()) + dst.fd = nullfd; + } } } static void - read_posix_nm (istream& is, symbols& syms) + read_posix_nm (diag_buffer& dbuf, ifdstream& is, symbols& syms) { + // Note: io_error is handled by the caller. + // Lines that describe symbols look like: // // <NAME> <TYPE> <VALUE> <SIZE> // // The types that we are interested in are T, D, R, and B. // - string l; - while (!eof (getline (is, l))) + auto parse_line = [&syms] (const string& l) { size_t b (0), e (0), n; @@ -193,7 +252,7 @@ namespace build2 n = next_word (l, b, e); if (n == 0) - continue; + return; string s (l, b, n); @@ -202,15 +261,50 @@ namespace build2 n = next_word (l, b, e); if (n != 1) - continue; + return; switch (l[b]) { case 'D': syms.d.insert (move (s)); break; case 'R': syms.r.insert (move (s)); break; case 'B': syms.b.insert (move (s)); break; + case 'c': + case 'C': syms.c.insert (move (s)); break; case 'T': syms.t.insert (move (s)); break; } + }; + + // Read until we reach EOF on all streams. + // + // Note that if dbuf is not opened, then we automatically get an + // inactive nullfd entry. + // + fdselect_set fds {is.fd (), dbuf.is.fd ()}; + fdselect_state& ist (fds[0]); + fdselect_state& dst (fds[1]); + + for (string l; ist.fd != nullfd || dst.fd != nullfd; ) + { + if (ist.fd != nullfd && getline_non_blocking (is, l)) + { + if (eof (is)) + ist.fd = nullfd; + else + { + parse_line (l); + l.clear (); + } + + continue; + } + + ifdselect (fds); + + if (dst.ready) + { + if (!dbuf.read ()) + dst.fd = nullfd; + } } } @@ -311,11 +405,20 @@ namespace build2 if (const char* v = filter (s)) os << " " << v << " DATA\n"; + // For common symbols, only write extern C. + // + for (const string& s: syms.c) + if (extern_c (s)) + if (const char* v = filter (s)) + os << " " << v << " DATA\n"; + // Read-only data contains an especially large number of various // special symbols. Instead of trying to filter them out case by case, // we will try to recognize C/C++ identifiers plus the special symbols // that we need to export (e.g., vtable). // + // Note that it looks like rdata should not be declared DATA. It is + // known to break ??_7 (vtable) exporting (see GH issue 315). // for (const string& s: syms.r) { @@ -323,7 +426,7 @@ namespace build2 (s[0] == '?' && s[1] != '?') || // C++ s.compare (0, 4, "??_7") == 0) // vtable { - os << " " << strip (s) << " DATA\n"; + os << " " << strip (s) << '\n'; } } } @@ -386,11 +489,21 @@ namespace build2 if (const char* v = filter (s)) os << " " << v << " DATA\n"; + for (const string& s: syms.c) + if (const char* v = filter (s)) + os << " " << v << " DATA\n"; + // Read-only data contains an especially large number of various // special symbols. Instead of trying to filter them out case by case, // we will try to recognize C/C++ identifiers plus the special symbols // that we need to export (e.g., vtable and typeinfo). // + // For the description of GNU binutils .def format, see: + // + // https://sourceware.org/binutils/docs/binutils/def-file-format.html + // + // @@ Maybe CONSTANT is more appropriate than DATA? + // for (const string& s: syms.r) { if (s.find_first_of (".") != string::npos) // Special (.refptr.*) @@ -411,7 +524,7 @@ namespace build2 } bool def_rule:: - match (action a, target& t, const string&) const + match (action a, target& t) const { tracer trace ("bin::def_rule::match"); @@ -615,8 +728,12 @@ namespace build2 const char*& arg (*(args.end () - 2)); + // We could print the prerequisite if it's a single obj{}/libu{} (with + // the latter being the common case). But it doesn't feel like that's + // worth the variability and the associated possibility of confusion. + // if (verb == 1) - text << "def " << t; + print_diag ("def", t); // Extract symbols from each object file. // @@ -636,22 +753,37 @@ namespace build2 // Both dumpbin.exe and nm send their output to stdout. While nm sends // diagnostics to stderr, dumpbin sends it to stdout together with the - // output. + // output. To keep things uniform we will buffer stderr in both cases. // - process pr (run_start (nm, - args, - 0 /* stdin */, - -1 /* stdout */)); + process pr ( + run_start (nm, + args, + 0 /* stdin */, + -1 /* stdout */, + diag_buffer::pipe (ctx) /* stderr */)); + + // Note that while we read both streams until eof in the normal + // circumstances, we cannot use fdstream_mode::skip for the exception + // case on both of them: we may end up being blocked trying to read + // one stream while the process may be blocked writing to the other. + // So in case of an exception we only skip the diagnostics and close + // stdout hard. The latter should happen first so the order of the + // dbuf/is variables is important. + // + diag_buffer dbuf (ctx, args[0], pr, (fdstream_mode::non_blocking | + fdstream_mode::skip)); + bool io (false); try { - ifdstream is ( - move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit); + ifdstream is (move (pr.in_ofd), + fdstream_mode::non_blocking, + ifdstream::badbit); if (lid == "msvc" || nid == "msvc") - read_dumpbin (is, syms); + read_dumpbin (dbuf, is, syms); else - read_posix_nm (is, syms); + read_posix_nm (dbuf, is, syms); is.close (); } @@ -663,16 +795,17 @@ namespace build2 io = true; } - if (!run_finish_code (args.data (), pr) || io) + if (!run_finish_code (dbuf, args, pr, 1 /* verbosity */) || io) fail << "unable to extract symbols from " << arg; } - /* +#if 0 for (const string& s: syms.d) text << "D " << s; for (const string& s: syms.r) text << "R " << s; for (const string& s: syms.b) text << "B " << s; + for (const string& s: syms.c) text << "C " << s; for (const string& s: syms.t) text << "T " << s; - */ +#endif if (verb >= 3) text << "cat >" << tp; @@ -712,6 +845,6 @@ namespace build2 return target_state::changed; } - const string def_rule::rule_id_ {"bin.def 1"}; + const string def_rule::rule_id_ {"bin.def 2"}; } } |