1 files changed, 186 insertions, 53 deletions
diff --git a/libbuild2/bin/def-rule.cxx b/libbuild2/bin/def-rule.cxx
index ab31fde..143cc35 100644
--- a/libbuild2/bin/def-rule.cxx
+++ b/libbuild2/bin/def-rule.cxx
@@ -7,6 +7,7 @@
 #include <libbuild2/scope.hxx>
 #include <libbuild2/target.hxx>
 #include <libbuild2/algorithm.hxx>
+#include <libbuild2/filesystem.hxx>
 #include <libbuild2/diagnostics.hxx>
 
 #include <libbuild2/bin/target.hxx>
@@ -16,17 +17,26 @@ namespace build2
 {
   namespace bin
   {
+    // In C global uninitialized data becomes a "common symbol" (an equivalent
+    // definition compiled as C++ results in a BSS symbol) which allows some
+    // archaic merging of multiple such definitions during linking (see GNU ld
+    // --warn-common for background). Note that this merging may happen with
+    // other data symbol types, not just common.
+    //
     struct symbols
     {
       set<string> d; // data
       set<string> r; // read-only data
       set<string> b; // uninitialized data (BSS)
+      set<string> c; // common uninitialized data
       set<string> t; // text (code)
     };
 
     static void
-    read_dumpbin (istream& is, symbols& syms)
+    read_dumpbin (diag_buffer& dbuf, ifdstream& is, symbols& syms)
     {
+      // Note: io_error is handled by the caller.
+
       // Lines that describe symbols look like:
       //
       // 0   1        2      3          4            5 6
@@ -62,29 +72,29 @@ namespace build2
       // B44 00000000 SECT4  notype    Static       | .rdata$r
       // AA2 00000000 SECT5  notype    Static       | .bss
       //
-
-      // Map of read-only (.rdata, .xdata) and uninitialized (.bss) sections
-      // to their types (R and B, respectively). If a section is not found in
-      // this map, then it's assumed to be normal data (.data).
+      // Note that an UNDEF data symbol with non-zero OFFSET is a "common
+      // symbol", equivalent to the nm `C` type.
       //
-      map<string, char> sections;
-
-      string l;
-      while (!eof (getline (is, l)))
+      // We keep a map of read-only (.rdata, .xdata) and uninitialized (.bss)
+      // sections to their types (R and B, respectively). If a section is not
+      // found in this map, then it's assumed to be normal data (.data).
+      //
+      auto parse_line = [&syms,
+                         secs = map<string, char> ()] (const string& l) mutable
       {
         size_t b (0), e (0), n;
 
         // IDX (note that it can be more than 3 characters).
         //
         if (next_word (l, b, e) == 0)
-          continue;
+          return;
 
         // OFFSET (always 8 characters).
         //
         n = next_word (l, b, e);
 
         if (n != 8)
-          continue;
+          return;
 
         string off (l, b, n);
 
@@ -92,8 +102,8 @@ namespace build2
         //
         n = next_word (l, b, e);
 
-        if (n == 0 || l.compare (b, n, "UNDEF") == 0)
-          continue;
+        if (n == 0)
+          return;
 
         string sec (l, b, n);
 
@@ -102,23 +112,23 @@ namespace build2
         n = next_word (l, b, e);
 
         if (l.compare (b, n, "notype") != 0)
-          continue;
+          return;
 
-        bool d;
+        bool dat;
         if (l[e] == ' ' && l[e + 1] == '(' && l[e + 2] == ')')
         {
           e += 3;
-          d = false;
+          dat = false;
         }
         else
-          d = true;
+          dat = true;
 
         // VISIBILITY
         //
         n = next_word (l, b, e);
 
         if (n == 0)
-          continue;
+          return;
 
         string vis (l, b, n);
 
@@ -127,20 +137,24 @@ namespace build2
         n = next_word (l, b, e);
 
         if (n != 1 || l[b] != '|')
-          continue;
+          return;
 
         // SYMNAME
         //
         n = next_word (l, b, e);
 
         if (n == 0)
-          continue;
+          return;
 
         string s (l, b, n);
 
         // See if this is the section type symbol.
         //
-        if (d && off == "00000000" && vis == "Static" && s[0] == '.')
+        if (dat &&
+            off == "00000000" &&
+            sec != "UNDEF"    &&
+            vis == "Static"   &&
+            s[0] == '.')
         {
           auto cmp = [&s] (const char* n, size_t l)
           {
@@ -148,43 +162,88 @@ namespace build2
           };
 
           if      (cmp (".rdata", 6) ||
-                   cmp (".xdata", 6))    sections.emplace (move (sec), 'R');
-          else if (cmp (".bss",   4))    sections.emplace (move (sec), 'B');
+                   cmp (".xdata", 6))    secs.emplace (move (sec), 'R');
+          else if (cmp (".bss",   4))    secs.emplace (move (sec), 'B');
 
-          continue;
+          return;
         }
 
         // We can only export extern symbols.
         //
         if (vis != "External")
-          continue;
+          return;
 
-        if (d)
+        if (dat)
         {
-          auto i (sections.find (sec));
-          switch (i == sections.end () ? 'D' : i->second)
+          if (sec != "UNDEF")
           {
-          case 'D': syms.d.insert (move (s)); break;
-          case 'R': syms.r.insert (move (s)); break;
-          case 'B': syms.b.insert (move (s)); break;
+            auto i (secs.find (sec));
+            switch (i == secs.end () ? 'D' : i->second)
+            {
+            case 'D': syms.d.insert (move (s)); break;
+            case 'R': syms.r.insert (move (s)); break;
+            case 'B': syms.b.insert (move (s)); break;
+            }
+          }
+          else
+          {
+            if (off != "00000000")
+              syms.c.insert (move (s));
           }
         }
         else
-          syms.t.insert (move (s));
+        {
+          if (sec != "UNDEF")
+            syms.t.insert (move (s));
+        }
+      };
+
+      // Read until we reach EOF on all streams.
+      //
+      // Note that if dbuf is not opened, then we automatically get an
+      // inactive nullfd entry.
+      //
+      fdselect_set fds {is.fd (), dbuf.is.fd ()};
+      fdselect_state& ist (fds[0]);
+      fdselect_state& dst (fds[1]);
+
+      for (string l; ist.fd != nullfd || dst.fd != nullfd; )
+      {
+        if (ist.fd != nullfd && getline_non_blocking (is, l))
+        {
+          if (eof (is))
+            ist.fd = nullfd;
+          else
+          {
+            parse_line (l);
+            l.clear ();
+          }
+
+          continue;
+        }
+
+        ifdselect (fds);
+
+        if (dst.ready)
+        {
+          if (!dbuf.read ())
+            dst.fd = nullfd;
+        }
       }
     }
 
     static void
-    read_posix_nm (istream& is, symbols& syms)
+    read_posix_nm (diag_buffer& dbuf, ifdstream& is, symbols& syms)
     {
+      // Note: io_error is handled by the caller.
+
       // Lines that describe symbols look like:
       //
       // <NAME> <TYPE> <VALUE> <SIZE>
       //
       // The types that we are interested in are T, D, R, and B.
       //
-      string l;
-      while (!eof (getline (is, l)))
+      auto parse_line = [&syms] (const string& l)
       {
         size_t b (0), e (0), n;
 
@@ -193,7 +252,7 @@ namespace build2
         n = next_word (l, b, e);
 
         if (n == 0)
-          continue;
+          return;
 
         string s (l, b, n);
 
@@ -202,15 +261,50 @@ namespace build2
         n = next_word (l, b, e);
 
         if (n != 1)
-          continue;
+          return;
 
         switch (l[b])
         {
         case 'D': syms.d.insert (move (s)); break;
         case 'R': syms.r.insert (move (s)); break;
         case 'B': syms.b.insert (move (s)); break;
+        case 'c':
+        case 'C': syms.c.insert (move (s)); break;
         case 'T': syms.t.insert (move (s)); break;
         }
+      };
+
+      // Read until we reach EOF on all streams.
+      //
+      // Note that if dbuf is not opened, then we automatically get an
+      // inactive nullfd entry.
+      //
+      fdselect_set fds {is.fd (), dbuf.is.fd ()};
+      fdselect_state& ist (fds[0]);
+      fdselect_state& dst (fds[1]);
+
+      for (string l; ist.fd != nullfd || dst.fd != nullfd; )
+      {
+        if (ist.fd != nullfd && getline_non_blocking (is, l))
+        {
+          if (eof (is))
+            ist.fd = nullfd;
+          else
+          {
+            parse_line (l);
+            l.clear ();
+          }
+
+          continue;
+        }
+
+        ifdselect (fds);
+
+        if (dst.ready)
+        {
+          if (!dbuf.read ())
+            dst.fd = nullfd;
+        }
       }
     }
 
@@ -311,11 +405,20 @@ namespace build2
           if (const char* v = filter (s))
             os << "  " << v << " DATA\n";
 
+        // For common symbols, only write extern C.
+        //
+        for (const string& s: syms.c)
+          if (extern_c (s))
+            if (const char* v = filter (s))
+              os << "  " << v << " DATA\n";
+
         // Read-only data contains an especially large number of various
         // special symbols. Instead of trying to filter them out case by case,
         // we will try to recognize C/C++ identifiers plus the special symbols
         // that we need to export (e.g., vtable).
         //
+        // Note that it looks like rdata should not be declared DATA. It is
+        // known to break ??_7 (vtable) exporting (see GH issue 315).
         //
         for (const string& s: syms.r)
         {
@@ -323,7 +426,7 @@ namespace build2
               (s[0] == '?' && s[1] != '?') || // C++
               s.compare (0, 4, "??_7") == 0)  // vtable
           {
-            os << "  " << strip (s) << " DATA\n";
+            os << "  " << strip (s) << '\n';
           }
         }
       }
@@ -386,11 +489,21 @@ namespace build2
           if (const char* v = filter (s))
             os << "  " << v << " DATA\n";
 
+        for (const string& s: syms.c)
+          if (const char* v = filter (s))
+            os << "  " << v << " DATA\n";
+
         // Read-only data contains an especially large number of various
         // special symbols. Instead of trying to filter them out case by case,
         // we will try to recognize C/C++ identifiers plus the special symbols
         // that we need to export (e.g., vtable and typeinfo).
         //
+        // For the description of GNU binutils .def format, see:
+        //
+        // https://sourceware.org/binutils/docs/binutils/def-file-format.html
+        //
+        // @@ Maybe CONSTANT is more appropriate than DATA?
+        //
         for (const string& s: syms.r)
         {
           if (s.find_first_of (".") != string::npos) // Special (.refptr.*)
@@ -411,7 +524,7 @@ namespace build2
     }
 
     bool def_rule::
-    match (action a, target& t, const string&) const
+    match (action a, target& t) const
     {
       tracer trace ("bin::def_rule::match");
 
@@ -615,8 +728,12 @@ namespace build2
 
       const char*& arg (*(args.end () - 2));
 
+      // We could print the prerequisite if it's a single obj{}/libu{} (with
+      // the latter being the common case). But it doesn't feel like that's
+      // worth the variability and the associated possibility of confusion.
+      //
       if (verb == 1)
-        text << "def " << t;
+        print_diag ("def", t);
 
       // Extract symbols from each object file.
       //
@@ -636,22 +753,37 @@ namespace build2
 
         // Both dumpbin.exe and nm send their output to stdout. While nm sends
         // diagnostics to stderr, dumpbin sends it to stdout together with the
-        // output.
+        // output. To keep things uniform we will buffer stderr in both cases.
         //
-        process pr (run_start (nm,
-                               args,
-                               0     /* stdin */,
-                               -1    /* stdout */));
+        process pr (
+          run_start (nm,
+                     args,
+                     0                       /* stdin */,
+                     -1                      /* stdout */,
+                     diag_buffer::pipe (ctx) /* stderr */));
+
+        // Note that while we read both streams until eof in the normal
+        // circumstances, we cannot use fdstream_mode::skip for the exception
+        // case on both of them: we may end up being blocked trying to read
+        // one stream while the process may be blocked writing to the other.
+        // So in case of an exception we only skip the diagnostics and close
+        // stdout hard. The latter should happen first so the order of the
+        // dbuf/is variables is important.
+        //
+        diag_buffer dbuf (ctx, args[0], pr, (fdstream_mode::non_blocking |
+                                             fdstream_mode::skip));
+
         bool io (false);
         try
         {
-          ifdstream is (
-            move (pr.in_ofd), fdstream_mode::skip, ifdstream::badbit);
+          ifdstream is (move (pr.in_ofd),
+                        fdstream_mode::non_blocking,
+                        ifdstream::badbit);
 
           if (lid == "msvc" || nid == "msvc")
-            read_dumpbin (is, syms);
+            read_dumpbin (dbuf, is, syms);
           else
-            read_posix_nm (is, syms);
+            read_posix_nm (dbuf, is, syms);
 
           is.close ();
         }
@@ -663,16 +795,17 @@ namespace build2
           io = true;
         }
 
-        if (!run_finish_code (args.data (), pr) || io)
+        if (!run_finish_code (dbuf, args, pr, 1 /* verbosity */) || io)
           fail << "unable to extract symbols from " << arg;
       }
 
-      /*
+#if 0
       for (const string& s: syms.d) text << "D " << s;
       for (const string& s: syms.r) text << "R " << s;
       for (const string& s: syms.b) text << "B " << s;
+      for (const string& s: syms.c) text << "C " << s;
       for (const string& s: syms.t) text << "T " << s;
-      */
+#endif
 
       if (verb >= 3)
         text << "cat >" << tp;
@@ -712,6 +845,6 @@ namespace build2
       return target_state::changed;
     }
 
-    const string def_rule::rule_id_ {"bin.def 1"};
+    const string def_rule::rule_id_ {"bin.def 2"};
   }
 }