diff options
author | Matthew Krupcale <mkrupcale@matthewkrupcale.com> | 2020-08-09 09:45:15 -0400 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2020-08-12 14:03:10 +0200 |
commit | d7571f706928dd023097540dfc88448cf80fdcdb (patch) | |
tree | 5e83dada2725ddf08c47f74334b2eebcf92854d3 /libbuild2/cc | |
parent | ca2283bdc00529967f9ccd9bf0097714447af351 (diff) |
Adjust linker arguments for LTO parallelization
GCC 10+ and Clang 4+ support controlling the number of LTO threads/jobs used
during linking. Use the build2 scheduler to allocate up to the number of
hardware threads to the GCC or Clang linker processes when -flto=auto or
-flto=thin is specified, respectively. Otherwise, GCC or Clang will attempt to
spawn the number of hardware threads detected for each linker process, which
could result in up to n^2 linker threads on a CPU with n hardware threads.
Diffstat (limited to 'libbuild2/cc')
-rw-r--r-- | libbuild2/cc/link-rule.cxx | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx index e94f538..b0457e7 100644 --- a/libbuild2/cc/link-rule.cxx +++ b/libbuild2/cc/link-rule.cxx @@ -34,6 +34,7 @@ namespace build2 namespace cc { using namespace bin; + using build2::to_string; link_rule:: link_rule (data&& d) @@ -2996,6 +2997,58 @@ namespace build2 else if (verb == 2) print_process (args); + // Adjust linker parallelism. + // + string jobs_arg; + scheduler::alloc_guard jobs_extra; + + if (!lt.static_library ()) + { + switch (ctype) + { + case compiler_type::gcc: + { + // Rewrite -flto=auto (available since GCC 10). + // + // By default GCC 10 splits the optimization into 128 units. + // + if (cmaj < 10) + break; + + auto i (find_option_prefix ("-flto", args.rbegin (), args.rend ())); + if (i != args.rend () && strcmp (*i, "-flto=auto") == 0) + { + jobs_extra = scheduler::alloc_guard (ctx.sched, 0); + jobs_arg = "-flto=" + to_string (1 + jobs_extra.n); + *i = jobs_arg.c_str (); + } + break; + } + case compiler_type::clang: + { + // If we have -flto=thin and no explicit -flto-jobs=N (available + // since Clang 4), then add our own -flto-jobs value. + // + if (cmaj < 4) + break; + + auto i (find_option_prefix ("-flto", args.rbegin (), args.rend ())); + if (i != args.rend () && + strcmp (*i, "-flto=thin") == 0 && + !find_option_prefix ("-flto-jobs=", args)) + { + jobs_extra = scheduler::alloc_guard (ctx.sched, 0); + jobs_arg = "-flto-jobs=" + to_string (1 + jobs_extra.n); + args.insert (i.base (), jobs_arg.c_str ()); // After -flto=thin. + } + break; + } + case compiler_type::msvc: + case compiler_type::icc: + break; + } + } + // Do any necessary fixups to the command line to make it runnable. // // Notice the split in the diagnostics: at verbosity level 1 we print @@ -3162,6 +3215,7 @@ namespace build2 } run_finish (args, pr); + jobs_extra.deallocate (); } catch (const process_error& e) { |