From d7571f706928dd023097540dfc88448cf80fdcdb Mon Sep 17 00:00:00 2001 From: Matthew Krupcale Date: Sun, 9 Aug 2020 09:45:15 -0400 Subject: Adjust linker arguments for LTO parallelization GCC 10+ and Clang 4+ support controlling the number of LTO threads/jobs used during linking. Use the build2 scheduler to allocate up to the number of hardware threads to the GCC or Clang linker processes when -flto=auto or -flto=thin is specified, respectively. Otherwise, GCC or Clang will attempt to spawn the number of hardware threads detected for each linker process, which could result in up to n^2 linker threads on a CPU with n hardware threads. --- libbuild2/cc/link-rule.cxx | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'libbuild2') diff --git a/libbuild2/cc/link-rule.cxx b/libbuild2/cc/link-rule.cxx index e94f538..b0457e7 100644 --- a/libbuild2/cc/link-rule.cxx +++ b/libbuild2/cc/link-rule.cxx @@ -34,6 +34,7 @@ namespace build2 namespace cc { using namespace bin; + using build2::to_string; link_rule:: link_rule (data&& d) @@ -2996,6 +2997,58 @@ namespace build2 else if (verb == 2) print_process (args); + // Adjust linker parallelism. + // + string jobs_arg; + scheduler::alloc_guard jobs_extra; + + if (!lt.static_library ()) + { + switch (ctype) + { + case compiler_type::gcc: + { + // Rewrite -flto=auto (available since GCC 10). + // + // By default GCC 10 splits the optimization into 128 units. + // + if (cmaj < 10) + break; + + auto i (find_option_prefix ("-flto", args.rbegin (), args.rend ())); + if (i != args.rend () && strcmp (*i, "-flto=auto") == 0) + { + jobs_extra = scheduler::alloc_guard (ctx.sched, 0); + jobs_arg = "-flto=" + to_string (1 + jobs_extra.n); + *i = jobs_arg.c_str (); + } + break; + } + case compiler_type::clang: + { + // If we have -flto=thin and no explicit -flto-jobs=N (available + // since Clang 4), then add our own -flto-jobs value. + // + if (cmaj < 4) + break; + + auto i (find_option_prefix ("-flto", args.rbegin (), args.rend ())); + if (i != args.rend () && + strcmp (*i, "-flto=thin") == 0 && + !find_option_prefix ("-flto-jobs=", args)) + { + jobs_extra = scheduler::alloc_guard (ctx.sched, 0); + jobs_arg = "-flto-jobs=" + to_string (1 + jobs_extra.n); + args.insert (i.base (), jobs_arg.c_str ()); // After -flto=thin. + } + break; + } + case compiler_type::msvc: + case compiler_type::icc: + break; + } + } + // Do any necessary fixups to the command line to make it runnable. // // Notice the split in the diagnostics: at verbosity level 1 we print @@ -3162,6 +3215,7 @@ namespace build2 } run_finish (args, pr); + jobs_extra.deallocate (); } catch (const process_error& e) { -- cgit v1.1