// file : libbuild2/bin/guess.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/bin/guess.hxx> #include <libbuild2/diagnostics.hxx> using namespace std; namespace build2 { namespace bin { struct guess_result { string id; string signature; string checksum; optional<semantic_version> version; guess_result () = default; guess_result (string&& i, string&& s, semantic_version&& v) : id (move (i)), signature (move (s)), version (move (v)) {} guess_result (string&& i, string&& s, optional<semantic_version>&& v) : id (move (i)), signature (move (s)), version (move (v)) {} bool empty () const {return id.empty ();} }; // Try to parse a semantic-like version from the specified position. // Return 0-version if the version is invalid. // static inline semantic_version parse_version (const string& s, size_t p = 0, const char* bs = ".-+~ ") { optional<semantic_version> v (parse_semantic_version (s, p, bs)); return v ? *v : semantic_version (); } // Search for a program first in paths if not NULL and then using the // standard path search semantics. Use var to suggest an override if the // search fails. // // Only search in PATH (specifically, omitting the current executable's // directory on Windows). // static process_path search (const path& prog, const char* paths, const char* var) { if (paths != nullptr) { process_path r ( run_try_search (prog, false /* init (cached) */, dir_path () /* fallback */, true /* path_only */, paths)); if (!r.empty ()) { // Clear the recall path since we found it in custom search paths. // An alternative would have been to also do a search in PATH and if // the two effective paths are the same (which means, this program // is also in PATH), keep the recall. The benefit of this approach // is that we will have tidier command lines without long absolute // paths. The drawback is the extra complexity (we would need to // normalize the paths, etc). Let's keep it simple for now. // r.clear_recall (); return r; } } auto df = make_diag_frame ( [var](const diag_record& dr) { dr << info << "use " << var << " to override"; }); return run_search (prog, false, dir_path (), true); } // Extracting ar/ranlib information requires running them which can become // expensive if done repeatedly. So we cache the result. // static global_cache<ar_info> ar_cache; const ar_info& guess_ar (const path& ar, const path* rl, const char* paths) { tracer trace ("bin::guess_ar"); // First check the cache. // string key; { sha256 cs; cs.append (ar.string ()); if (rl != nullptr) cs.append (rl->string ()); if (paths != nullptr) cs.append (paths); key = cs.string (); if (const ar_info* r = ar_cache.find (key)) return *r; } guess_result arr, rlr; process_path arp (search (ar, paths, "config.bin.ar")); process_path rlp (rl != nullptr ? search (*rl, paths, "config.bin.ranlib") : process_path ()); // We should probably assume the utility output language words can be // translated and even rearranged. Thus pass LC_ALL=C. // process_env are (arp); process_env rle (rlp); // For now let's assume that all the platforms other than Windows // recognize LC_ALL. // #ifndef _WIN32 const char* evars[] = {"LC_ALL=C", nullptr}; are.vars = evars; rle.vars = evars; #endif // Binutils, LLVM, and FreeBSD ar/ranlib all recognize the --version // option. While Microsoft's lib.exe doesn't support --version, it only // issues a warning and exits with zero status, printing its usual // banner before that (running lib.exe without any options result in // non-zero exit status -- go figure). So we are going to start with // that. // // LLVM's llvm-lib.exe is similar to the Microsoft's version except it // does not print any banners (it does print "LLVM Lib" phrase in the /? // output). In fact, there doesn't seem to be any way to extract its // version (maybe we could run llvm-ar instead -- it seems to be always // around). // // On NetBSD we get: // // GNU ar (NetBSD Binutils nb1) 2.31.1 // GNU ranlib (NetBSD Binutils nb1) 2.31.1 // { auto f = [&ar] (string& l, bool) -> guess_result { // Normally GNU binutils ar --version output has a line that starts // with "GNU ar" and ends with the version. For example: // // "GNU ar (GNU Binutils) 2.26" // "GNU ar (GNU Binutils for Ubuntu) 2.26.1" // // However, some embedded toolchain makers customize this stuff in // all kinds of ways. For example: // // "ppc-vle-ar (HighTec Release HDP-v4.6.6.1-bosch-1.3-3c1e3bc) build on 2017-03-23 (GNU Binutils) 2.20" // "GNU ar version 2.13 (tricore) using BFD version 2.13 (2008-12-10)" // // So let's look for "GNU " and be prepared to find junk instead of // the version. // if (l.find ("GNU ") != string::npos) { semantic_version v (parse_version (l, l.rfind (' ') + 1)); return guess_result ("gnu", move (l), move (v)); } // LLVM ar --version output has a line that starts with // "LLVM version " and ends with the version, for example: // // "LLVM version 3.5.2" // "LLVM version 5.0.0" // if (l.compare (0, 13, "LLVM version ") == 0) { semantic_version v (parse_version (l, l.rfind (' ') + 1)); return guess_result ("llvm", move (l), move (v)); } // FreeBSD ar --verison output starts with "BSD ar " followed by // the version and some extra information, for example: // // "BSD ar 1.1.0 - libarchive 3.1.2" // // We will treat the extra information as the build component. // if (l.compare (0, 7, "BSD ar ") == 0) { semantic_version v (parse_version (l, 7)); return guess_result ("bsd", move (l), move (v)); } // Microsoft lib.exe output starts with "Microsoft (R) " and ends // with a four-component version, for example: // // "Microsoft (R) Library Manager Version 14.00.24215.1" // "Microsoft (R) Library Manager Version 14.14.26428.1" // if (l.compare (0, 14, "Microsoft (R) ") == 0) { semantic_version v (parse_version (l, l.rfind (' ') + 1)); return guess_result ("msvc", move (l), move (v)); } // For now we will recognize LLVM lib via its name. // const string& s (ar.string ()); size_t s_p (path::traits_type::find_leaf (s)); size_t s_n (s.size ()); if (find_stem (s, s_p, s_n, "llvm-lib") != string::npos) return guess_result ("msvc-llvm", "LLVM lib (unknown version)", semantic_version (0, 0, 0)); return guess_result (); }; // Suppress all the errors because we may be trying an unsupported // option. Note that in case of lib.exe we will hash the warning // (yes, it goes to stdout) but that seems harmless. // sha256 cs; arr = run<guess_result> (3, are, "--version", f, false, false, &cs); if (!arr.empty ()) arr.checksum = cs.string (); } // On Mac OS X (and probably also older BSDs) ar/ranlib doesn't have an // option to display version or help. If we run it without any arguments // it dumps usage and exist with an error status. So we will have to use // that. // if (arr.empty ()) { auto f = [] (string& l, bool) -> guess_result { return l.find (" ar ") != string::npos ? guess_result ("generic", move (l), semantic_version ()) : guess_result (); }; // Redirect STDERR to STDOUT and ignore exit status. // sha256 cs; arr = run<guess_result> (3, are, f, false, true, &cs); if (!arr.empty ()) { l4 ([&]{trace << "generic ar '" << arr.signature << "'";}); arr.checksum = cs.string (); } } if (arr.empty ()) fail << "unable to guess " << ar << " signature"; // Now repeat pretty much the same steps for ranlib if requested. We // don't bother with the version assuming it is the same as for ar. // if (rl != nullptr) { // Binutils, LLVM, and FreeBSD. // { auto f = [] (string& l, bool) -> guess_result { // The same story as with ar: normally starts with "GNU ranlib " // but can vary. // if (l.find ("GNU ") != string::npos) return guess_result ("gnu", move (l), semantic_version ()); // "LLVM version ". // if (l.compare (0, 13, "LLVM version ") == 0) return guess_result ("llvm", move (l), semantic_version ()); // On FreeBSD we get "ranlib" rather than "BSD ranlib" for some // reason. Which means we can't really call it 'bsd' for sure. // //if (l.compare (0, 7, "ranlib ") == 0) // return guess_result ("bsd", move (l), semantic_version ()); return guess_result (); }; sha256 cs; rlr = run<guess_result> (3, rle, "--version", f, false, false, &cs); if (!rlr.empty ()) rlr.checksum = cs.string (); } // Mac OS X (and probably also older BSDs). // if (rlr.empty ()) { auto f = [] (string& l, bool) -> guess_result { return l.find ("ranlib") != string::npos ? guess_result ("generic", move (l), semantic_version ()) : guess_result (); }; // Redirect STDERR to STDOUT and ignore exit status. // sha256 cs; rlr = run<guess_result> (3, rle, f, false, true, &cs); if (!rlr.empty ()) { l4 ([&]{trace << "generic ranlib '" << rlr.signature << "'";}); rlr.checksum = cs.string (); } } if (rlr.empty ()) fail << "unable to guess " << *rl << " signature"; } // None of the ar/ranlib implementations we recognize seem to use // environment variables (not even Microsoft lib.exe). // return ar_cache.insert (move (key), ar_info { move (arp), move (arr.id), move (arr.signature), move (arr.checksum), move (*arr.version), nullptr, move (rlp), move (rlr.id), move (rlr.signature), move (rlr.checksum), nullptr}); } // Linker environment variables (see also the cc module which duplicates // some of these). // // Notes: // // - GNU linkers search in LD_LIBRARY_PATH in addition to LD_RUN_PATH but // we assume the former is part of the built-in list. Interestingly, // LLD does not search in either. // // - The LLD family of linkers have a bunch of undocumented, debugging- // related variables (LLD_REPRODUCE, LLD_VERSION, LLD_IN_TEST) that we // ignore. // // - ld64 uses a ton of environment variables (according to the source // code) but none of them are documented in the man pages. So someone // will need to figure out what's important (some of them are clearly // for debugging of ld itself). // // See also the note on environment and caching below if adding any new // variables. // static const char* gnu_ld_env[] = { "LD_RUN_PATH", "GNUTARGET", "LDEMULATION", "COLLECT_NO_DEMANGLE", nullptr}; static const char* msvc_ld_env[] = { "LIB", "LINK", "_LINK_", nullptr}; // Extracting ld information requires running it which can become // expensive if done repeatedly. So we cache the result. // static global_cache<ld_info> ld_cache; const ld_info& guess_ld (const path& ld, const char* paths) { tracer trace ("bin::guess_ld"); // First check the cache. // // Note that none of the information that we cache can be affected by // the environment. // string key; { sha256 cs; cs.append (ld.string ()); if (paths != nullptr) cs.append (paths); key = cs.string (); if (const ld_info* r = ld_cache.find (key)) return *r; } guess_result r; process_path pp (search (ld, paths, "config.bin.ld")); // We should probably assume the utility output language words can be // translated and even rearranged. Thus pass LC_ALL=C. // process_env env (pp); // For now let's assume that all the platforms other than Windows // recognize LC_ALL. // #ifndef _WIN32 const char* evars[] = {"LC_ALL=C", nullptr}; env.vars = evars; #endif // Binutils ld recognizes the --version option. Microsoft's link.exe // doesn't support --version (nor any other way to get the version // without the error exit status) but it will still print its banner. // We also want to recognize link.exe as fast as possible since it will // be the most commonly configured linker (for other platforms the // linker will normally be used indirectly via the compiler and the // bin.ld module won't be loaded). So we are going to ignore the error // exit status. Our signatures are fairly specific to avoid any kind of // false positives. // // When it comes to LLD, ld.lld (Unix), lld-link (Windows), and wasm-ld // (WebAssembly) all recognize --version while ld64.lld (Mac OS) does // not (and not even -v per Apple ld64; LLVM bug #43721). // // Version extraction is a @@ TODO. // { auto f = [&ld] (string& l, bool) -> guess_result { string id; optional<semantic_version> ver; // Microsoft link.exe output starts with "Microsoft (R) ". // if (l.compare (0, 14, "Microsoft (R) ") == 0) { id = "msvc"; } // LLD prints a line in the form "LLD X.Y.Z ...". // else if (l.compare (0, 4, "LLD ") == 0) { ver = parse_version (l, 4); // The only way to distinguish between various LLD drivers is via // their name. Handle potential prefixes (say a target) and // suffixes (say a version). // const string& s (ld.string ()); size_t s_p (path::traits_type::find_leaf (s)); size_t s_n (s.size ()); if (find_stem (s, s_p, s_n, "ld.lld" ) != string::npos) id = "gnu-lld"; else if (find_stem (s, s_p, s_n, "lld-link") != string::npos) id = "msvc-lld"; else if (find_stem (s, s_p, s_n, "ld64.lld") != string::npos) id = "ld64-lld"; else if (find_stem (s, s_p, s_n, "wasm-ld" ) != string::npos) id = "wasm-lld"; } // Binutils ld.bfd --version output has a line that starts with "GNU // ld " while ld.gold -- "GNU gold". Again, fortify it against // embedded toolchain customizations by search for "GNU " in the // former case (note that ld.lld mentions "GNU". // else if (l.compare (0, 9, "GNU gold ") == 0) { id = "gnu-gold"; } else if (l.find ("GNU ") != string::npos) { id = "gnu"; } return (id.empty () ? guess_result () : guess_result (move (id), move (l), move (ver))); }; // Redirect STDERR to STDOUT and ignore exit status. Note that in case // of link.exe we will hash the diagnostics (yes, it goes to stdout) // but that seems harmless. // sha256 cs; r = run<guess_result> (3, env, "--version", f, false, true, &cs); if (!r.empty ()) r.checksum = cs.string (); } // Next try -v which will cover Apple's linkers. // if (r.empty ()) { auto f = [] (string& l, bool) -> guess_result { // New ld64 has "PROJECT:ld64" in the first line (output to stderr), // for example: // // @(#)PROGRAM:ld PROJECT:ld64-242.2 // if (l.find ("PROJECT:ld64") != string::npos) return guess_result ("ld64", move (l), semantic_version ()); // Old ld has "cctools" in the first line, for example: // // Apple Computer, Inc. version cctools-622.9~2 // if (l.find ("cctools") != string::npos) return guess_result ("cctools", move (l), semantic_version ()); return guess_result (); }; sha256 cs; r = run<guess_result> (3, env, "-v", f, false, false, &cs); if (!r.empty ()) r.checksum = cs.string (); } // Finally try -version which will take care of older LLVM's lld. // if (r.empty ()) { auto f = [] (string& l, bool) -> guess_result { // Unlike other LLVM tools (e.g., ar), the lld's version is printed // (to stderr) as: // // LLVM Linker Version: 3.7 // if (l.compare (0, 19, "LLVM Linker Version") == 0) return guess_result ("gnu-lld", move (l), semantic_version ()); return guess_result (); }; // Suppress all the errors because we may be trying an unsupported // option. // sha256 cs; r = run<guess_result> (3, env, "-version", f, false, false, &cs); if (!r.empty ()) r.checksum = cs.string (); } if (r.empty ()) fail << "unable to guess " << ld << " signature"; const char* const* ld_env ((r.id == "gnu" || r.id == "gnu-gold") ? gnu_ld_env : (r.id == "msvc" || r.id == "msvc-lld") ? msvc_ld_env : nullptr); return ld_cache.insert (move (key), ld_info { move (pp), move (r.id), move (r.signature), move (r.checksum), move (r.version), ld_env}); } // Resource compiler environment variables. // // See also the note on environment and caching below if adding any new // variables. // static const char* msvc_rc_env[] = {"INCLUDE", nullptr}; // Extracting rc information requires running it which can become // expensive if done repeatedly. So we cache the result. // static global_cache<rc_info> rc_cache; const rc_info& guess_rc (const path& rc, const char* paths) { tracer trace ("bin::guess_rc"); // First check the cache. // // Note that none of the information that we cache can be affected by // the environment. // string key; { sha256 cs; cs.append (rc.string ()); if (paths != nullptr) cs.append (paths); key = cs.string (); if (const rc_info* r = rc_cache.find (key)) return *r; } guess_result r; process_path pp (search (rc, paths, "config.bin.rc")); // We should probably assume the utility output language words can be // translated and even rearranged. Thus pass LC_ALL=C. // process_env env (pp); // For now let's assume that all the platforms other than Windows // recognize LC_ALL. // #ifndef _WIN32 const char* evars[] = {"LC_ALL=C", nullptr}; env.vars = evars; #endif // Binutils windres recognizes the --version option. // // Version extraction is a @@ TODO. { auto f = [] (string& l, bool) -> guess_result { // Binutils windres --version output has a line that starts with // "GNU windres " but search for "GNU ", similar to other tools. // if (l.find ("GNU ") != string::npos) return guess_result ("gnu", move (l), semantic_version ()); return guess_result (); }; // Suppress all the errors because we may be trying an unsupported // option. // sha256 cs; r = run<guess_result> (3, env, "--version", f, false, false, &cs); if (!r.empty ()) r.checksum = cs.string (); } // Microsoft rc.exe /? prints its standard banner and exits with zero // status. LLVM's llvm-rc.exe /? doesn't print any LLVM-identifyable // information (unlike llvm-lib.exe) and similarly there doesn't seem to // be any way to get its version. // if (r.empty ()) { auto f = [&rc] (string& l, bool) -> guess_result { if (l.compare (0, 14, "Microsoft (R) ") == 0) return guess_result ("msvc", move (l), semantic_version ()); // For now we will recognize LLVM rc via its name. // const string& s (rc.string ()); size_t s_p (path::traits_type::find_leaf (s)); size_t s_n (s.size ()); if (find_stem (s, s_p, s_n, "llvm-rc") != string::npos) return guess_result ("msvc-llvm", "LLVM rc (unknown version)", semantic_version ()); return guess_result (); }; sha256 cs; r = run<guess_result> (3, env, "/?", f, false, false, &cs); if (!r.empty ()) r.checksum = cs.string (); } if (r.empty ()) fail << "unable to guess " << rc << " signature"; const char* const* rc_env ((r.id == "msvc" || r.id == "msvc-llvm") ? msvc_rc_env : nullptr); return rc_cache.insert (move (key), rc_info { move (pp), move (r.id), move (r.signature), move (r.checksum), rc_env}); } // Extracting nm information requires running it which can become // expensive if done repeatedly. So we cache the result. // static global_cache<nm_info> nm_cache; const nm_info& guess_nm (const path& nm, const char* paths) { tracer trace ("bin::guess_nm"); // First check the cache. // // Note that none of the information that we cache can be affected by // the environment. // string key; { sha256 cs; cs.append (nm.string ()); if (paths != nullptr) cs.append (paths); key = cs.string (); if (const nm_info* r = nm_cache.find (key)) return *r; } guess_result r; process_path pp (search (nm, paths, "config.bin.nm")); // We should probably assume the utility output language words can be // translated and even rearranged. Thus pass LC_ALL=C. // process_env env (pp); // For now let's assume that all the platforms other than Windows // recognize LC_ALL. // #ifndef _WIN32 const char* evars[] = {"LC_ALL=C", nullptr}; env.vars = evars; #endif // Both GNU Binutils and LLVM nm recognize the --version option. // // Microsoft dumpbin.exe does not recogize --version but will still // issue its standard banner (and even exit with zero status). // // FreeBSD uses nm from ELF Toolchain which recognizes --version. // // Mac OS X nm doesn't have an option to display version or help. If we // run it without any arguments, then it looks for a.out. So there // doesn't seem to be a way to detect it. // // Version extraction is a @@ TODO. { auto f = [] (string& l, bool) -> guess_result { // Binutils nm --version output first line starts with "GNU nm" but // search for "GNU ", similar to other tools. // if (l.find ("GNU ") != string::npos) return guess_result ("gnu", move (l), semantic_version ()); // LLVM nm --version output has a line that starts with // "LLVM version" followed by a version. // if (l.compare (0, 13, "LLVM version ") == 0) return guess_result ("llvm", move (l), semantic_version ()); if (l.compare (0, 14, "Microsoft (R) ") == 0) return guess_result ("msvc", move (l), semantic_version ()); // nm --version from ELF Toolchain prints: // // nm (elftoolchain r3477M) // if (l.find ("elftoolchain") != string::npos) return guess_result ("elftoolchain", move (l), semantic_version ()); return guess_result (); }; // Suppress all the errors because we may be trying an unsupported // option. // sha256 cs; r = run<guess_result> (3, env, "--version", f, false, false, &cs); if (!r.empty ()) r.checksum = cs.string (); } // Since there are some unrecognizable nm's (e.g., on Mac OS X), we will // have to assume generic if we managed to find the executable. // if (r.empty ()) r = guess_result ("generic", "", semantic_version ()); return nm_cache.insert (move (key), nm_info { move (pp), move (r.id), move (r.signature), move (r.checksum), nullptr /* environment */}); } } }