aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/bin/def-rule.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/bin/def-rule.cxx')
-rw-r--r--libbuild2/bin/def-rule.cxx239
1 files changed, 186 insertions, 53 deletions
diff --git a/libbuild2/bin/def-rule.cxx b/libbuild2/bin/def-rule.cxx
index ab31fde..143cc35 100644
--- a/libbuild2/bin/def-rule.cxx
+++ b/libbuild2/bin/def-rule.cxx
@@ -7,6 +7,7 @@
#include <libbuild2/scope.hxx>
#include <libbuild2/target.hxx>
#include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
#include <libbuild2/diagnostics.hxx>
#include <libbuild2/bin/target.hxx>
@@ -16,17 +17,26 @@ namespace build2
{
namespace bin
{
+ // In C global uninitialized data becomes a "common symbol" (an equivalent
+ // definition compiled as C++ results in a BSS symbol) which allows some
+ // archaic merging of multiple such definitions during linking (see GNU ld
+ // --warn-common for background). Note that this merging may happen with
+ // other data symbol types, not just common.
+ //
struct symbols
{
set<string> d; // data
set<string> r; // read-only data
set<string> b; // uninitialized data (BSS)
+ set<string> c; // common uninitialized data
set<string> t; // text (code)
};
static void
- read_dumpbin (istream& is, symbols& syms)
+ read_dumpbin (diag_buffer& dbuf, ifdstream& is, symbols& syms)
{
+ // Note: io_error is handled by the caller.
+
// Lines that describe symbols look like:
//
// 0 1 2 3 4 5 6
@@ -62,29 +72,29 @@ namespace build2
// B44 00000000 SECT4 notype Static | .rdata$r
// AA2 00000000 SECT5 notype Static | .bss
//
-
- // Map of read-only (.rdata, .xdata) and uninitialized (.bss) sections
- // to their types (R and B, respectively). If a section is not found in
- // this map, then it's assumed to be normal data (.data).
+ // Note that an UNDEF data symbol with non-zero OFFSET is a "common
+ // symbol", equivalent to the nm `C` type.
//
- map<string, char> sections;
-
- string l;
- while (!eof (getline (is, l)))
+ // We keep a map of read-only (.rdata, .xdata) and uninitialized (.bss)
+ // sections to their types (R and B, respectively). If a section is not
+ // found in this map, then it's assumed to be normal data (.data).
+ //
+ auto parse_line = [&syms,
+ secs = map<string, char> ()] (const string& l) mutable
{
size_t b (0), e (0), n;
// IDX (note that it can be more than 3 characters).
//
if (next_word (l, b, e) == 0)
- continue;
+ return;
// OFFSET (always 8 characters).
//
n = next_word (l, b, e);
if (n != 8)
- continue;
+ return;
string off (l, b, n);
@@ -92,8 +102,8 @@ namespace build2
//
n = next_word (l, b, e);
- if (n == 0 || l.compare (b, n, "UNDEF") == 0)
- continue;
+ if (n == 0)
+ return;
string sec (l, b, n);
@@ -102,23 +112,23 @@ namespace build2
n = next_word (l, b, e);
if (l.compare (b, n, "notype") != 0)
- continue;
+ return;
- bool d;
+ bool dat;
if (l[e] == ' ' && l[e + 1] == '(' && l[e + 2] == ')')
{
e += 3;
- d = false;
+ dat = false;
}
else
- d = true;
+ dat = true;
// VISIBILITY
//
n = next_word (l, b, e);
if (n == 0)
- continue;
+ return;
string vis (l, b, n);
@@ -127,20 +137,24 @@ namespace build2
n = next_word (l, b, e);
if (n != 1 || l[b] != '|')
- continue;
+ return;
// SYMNAME
//
n = next_word (l, b, e);
if (n == 0)
- continue;
+ return;
string s (l, b, n);
// See if this is the section type symbol.
//
- if (d && off == "00000000" && vis == "Static" && s[0] == '.')
+ if (dat &&
+ off == "00000000" &&
+ sec != "UNDEF" &&
+ vis == "Static" &&
+ s[0] == '.')
{
auto cmp = [&s] (const char* n, size_t l)
{
@@ -148,43 +162,88 @@ namespace build2
};
if (cmp (".rdata", 6) ||
- cmp (".xdata", 6)) sections.emplace (move (sec), 'R');
- else if (cmp (".bss", 4)) sections.emplace (move (sec), 'B');
+ cmp (".xdata", 6)) secs.emplace (move (sec), 'R');
+ else if (cmp (".bss", 4)) secs.emplace (move (sec), 'B');
- continue;
+ return;
}
// We can only export extern symbols.
//
if (vis != "External")
- continue;
+ return;
- if (d)
+ if (dat)
{
- auto i (sections.find (sec));
- switch (i == sections.end () ? 'D' : i->second)
+ if (sec != "UNDEF")
{
- case 'D': syms.d.insert (move (s)); break;
- case 'R': syms.r.insert (move (s)); break;
- case 'B': syms.b.insert (move (s)); break;
+ auto i (secs.find (sec));
+ switch (i == secs.end () ? 'D' : i->second)
+ {
+ case 'D': syms.d.insert (move (s)); break;
+ case 'R': syms.r.insert (move (s)); break;
+ case 'B': syms.b.insert (move (s)); break;
+ }
+ }
+ else
+ {
+ if (off != "00000000")
+ syms.c.insert (move (s));
}
}
else
- syms.t.insert (move (s));
+ {
+ if (sec != "UNDEF")
+ syms.t.insert (move (s));
+ }
+ };
+
+ // Read until we reach EOF on all streams.
+ //
+ // Note that if dbuf is not opened, then we automatically get an
+ // inactive nullfd entry.
+ //
+ fdselect_set fds {is.fd (), dbuf.is.fd ()};
+ fdselect_state& ist (fds[0]);
+ fdselect_state& dst (fds[1]);
+
+ for (string l; ist.fd != nullfd || dst.fd != nullfd; )
+ {
+ if (ist.fd != nullfd && getline_non_blocking (is, l))
+ {
+ if (eof (is))
+ ist.fd = nullfd;
+ else
+ {
+ parse_line (l);
+ l.clear ();
+ }
+
+ continue;
+ }
+
+ ifdselect (fds);
+
+ if (dst.ready)
+ {
+ if (!dbuf.read ())
+ dst.fd = nullfd;
+ }
}
}
static void
- read_posix_nm (istream& is, symbols& syms)
+ read_posix_nm (diag_buffer& dbuf, ifdstream& is, symbols& syms)
{
+ // Note: io_error is handled by the caller.
+
// Lines that describe symbols look like:
//
// <NAME> <TYPE> <VALUE> <SIZE>
//
// The types that we are interested in are T, D, R, and B.
//
- string l;
- while (!eof (getline (is, l)))
+ auto parse_line = [&syms] (const string& l)
{
size_t b (0), e (0), n;
@@ -193,7 +252,7 @@ namespace build2
n = next_word (l, b, e);
if (n == 0)
- continue;
+ return;
string s (l, b, n);
@@ -202,15 +261,50 @@ namespace build2
n = next_word (l, b, e);
if (n != 1)
- continue;
+ return;
switch (l[b])
{
case 'D': syms.d.insert (move (s)); break;
case 'R': syms.r.insert (move (s)); break;
case 'B': syms.b.insert (move (s)); break;
+ case 'c':
+ case 'C': syms.c.insert (move (s)); break;
case 'T': syms.t.insert (move (s)); break;
}
+ };
+
+ // Read until we reach EOF on all streams.
+ //
+ // Note that if dbuf is not opened, then we automatically get an
+ // inactive nullfd entry.
+ //
+ fdselect_set fds {is.fd (), dbuf.is.fd ()};
+ fdselect_state& ist (fds[0]);
+ fdselect_state& dst (fds[1]);
+
+ for (string l; ist.fd != nullfd || dst.fd != nullfd; )
+ {
+ if (ist.fd != nullfd && getline_non_blocking (is, l))
+ {
+ if (eof (is))
+ ist.fd = nullfd;
+ else
+ {
+ parse_line (l);
+ l.clear ();
+ }
+
+ continue;
+ }
+
+ ifdselect (fds);
+
+ if (dst.ready)
+ {
+ if (!dbuf.read ())
+ dst.fd = nullfd;
+ }
}
}
@@ -311,11 +405,20 @@ namespace build2
if (const char* v = filter (s))
os << " " << v << " DATA\n";
+ // For common symbols, only write extern C.
+ //
+ for (const string& s: syms.c)
+ if (extern_c (s))
+ if (const char* v = filter (s))
+ os << " " << v << " DATA\n";
+
// Read-only data contains an especially large number of various
// special symbols. Instead of trying to filter them out case by case,
// we will try to recognize C/C++ identifiers plus the special symbols
// that we need to export (e.g., vtable).
//
+ // Note that it looks like rdata should not be declared DATA. It is
+ // known to break ??_7 (vtable) exporting (see GH issue 315).
//
for (const string& s: syms.r)
{
@@ -323,7 +426,7 @@ namespace build2
(s[0] == '?' && s[1] != '?') || // C++
s.compare (0, 4, "??_7") == 0) // vtable
{
- os << " " << strip (s) << " DATA\n";
+ os << " " << strip (s) << '\n';
}
}
}
@@ -386,11 +489,21 @@ namespace build2
if (const char* v = filter (s))
os << " " << v << " DATA\n";
+ for (const string& s: syms.c)
+ if (const char* v = filter (s))
+ os << " " << v << " DATA\n";
+
// Read-only data contains an especially large number of various
// special symbols. Instead of trying to filter them out case by case,
// we will try to recognize C/C++ identifiers plus the special symbols
// that we need to export (e.g., vtable and typeinfo).
//
+ // For the description of GNU binutils .def format, see:
+ //
+ // https://sourceware.org/binutils/docs/binutils/def-file-format.html
+ //
+ // @@ Maybe CONSTANT is more appropriate than DATA?
+ //
for (const string& s: syms.r)
{
if (s.find_first_of (".") != string::npos) // Special (.refptr.*)
@@ -411,7 +524,7 @@ namespace build2
}
bool def_rule::
- match (action a, target& t, const string&) const
+ match (action a, target& t) const
{
tracer trace ("bin::def_rule::match");
@@ -615,8 +728,12 @@ namespace build2
const char*& arg (*(args.end () - 2));
+ // We could print the prerequisite if it's a single obj{}/libu{} (with
+ // the latter being the common case). But it doesn't feel like that's
+ // worth the variability and the associated possibility of confusion.
+ //
if (verb == 1)
- text << "def " << t;
+ print_diag ("def", t);
// Extract symbols from each object file.
//
@@ -636,22 +753,37 @@ namespace build2
// Both dumpbin.exe and nm send their output to stdout. While nm sends
// diagnostics to stderr, dumpbin sends it to stdout together with the
- // output.
+ // output. To keep things uniform we will buffer stderr in both cases.
//
- process pr (run_start (nm,
- args,
- 0 /* stdin */,
- -1 /* stdout */));
+ process pr (
+ run_start (nm,
+ args,
+ 0 /* stdin */,
+ -1 /* stdout */,
+ diag_buffer::pipe (ctx) /* stderr */));
+
+ // Note that while we read both streams until eof in the normal
+ // circumstances, we cannot use fdstream_mode::skip for the exception
+ // case on both of them: we may end up being blocked trying to read
+ // one stream while the process may be blocked writing to the other.
+ // So in case of an exception we only skip the diagnostics and close
+ // stdout hard. The latter should happen first so the order of the
+ // dbuf/is variables is important.
+ //
+ diag_buffer dbuf (ctx, args[0], pr, (fdstream_mode::non_blocking |
+ fdstream_mode::skip));
+
bool io (false);
try
{
- ifdstream is (
- move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit);
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::non_blocking,
+ ifdstream::badbit);
if (lid == "msvc" || nid == "msvc")
- read_dumpbin (is, syms);
+ read_dumpbin (dbuf, is, syms);
else
- read_posix_nm (is, syms);
+ read_posix_nm (dbuf, is, syms);
is.close ();
}
@@ -663,16 +795,17 @@ namespace build2
io = true;
}
- if (!run_finish_code (args.data (), pr) || io)
+ if (!run_finish_code (dbuf, args, pr, 1 /* verbosity */) || io)
fail << "unable to extract symbols from " << arg;
}
- /*
+#if 0
for (const string& s: syms.d) text << "D " << s;
for (const string& s: syms.r) text << "R " << s;
for (const string& s: syms.b) text << "B " << s;
+ for (const string& s: syms.c) text << "C " << s;
for (const string& s: syms.t) text << "T " << s;
- */
+#endif
if (verb >= 3)
text << "cat >" << tp;
@@ -712,6 +845,6 @@ namespace build2
return target_state::changed;
}
- const string def_rule::rule_id_ {"bin.def 1"};
+ const string def_rule::rule_id_ {"bin.def 2"};
}
}