aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2022-01-17 10:45:06 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2022-01-17 10:45:06 +0200
commitef6a70de5d0bc252380ebcb21eb84913473ead5d (patch)
treebb27438ea5a0afa69f6db5a9b55b367972dec63e
parent54c56af1067df562c232cb73b9a1b85f67d9bbf1 (diff)
Use UTF-8 as default input/source charset for C/C++ compilation
-rw-r--r--NEWS13
-rw-r--r--bootstrap-clang.bat2
-rw-r--r--bootstrap-mingw.bat2
-rw-r--r--bootstrap-msvc.bat2
-rw-r--r--bootstrap.gmake6
-rwxr-xr-xbootstrap.sh2
-rw-r--r--build/root.build2
-rw-r--r--libbuild2/cc/compile-rule.cxx116
8 files changed, 110 insertions, 35 deletions
diff --git a/NEWS b/NEWS
index a0c8694..d2c0090 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,18 @@
Version 0.15.0
+ * UTF-8 is now the default input/source character set for C/C++ compilation.
+
+ Specifically, the cc module now passes the appropriate compiler option
+ (/utf-8 for MSVC and -finput-charset=UTF-8 for GCC and Clang) unless a
+ custom value is already specified (with /{source,execution}-charset for
+ MSVC and -finput-charset for GCC and Clang).
+
+ This change may trigger new compilation errors in your source code if
+ it's not valid UTF-8 (such errors most commonly point into comments).
+ For various ways to fix this, see:
+
+ https://github.com/build2/HOWTO/blob/master/entries/convert-source-files-to-utf8.md
+
* Support for dynamic dependencies in ad hoc recipes.
Specifically, the `depdb` builtin now has the new `dyndep` command that
diff --git a/bootstrap-clang.bat b/bootstrap-clang.bat
index 00302e9..5a06a70 100644
--- a/bootstrap-clang.bat
+++ b/bootstrap-clang.bat
@@ -88,7 +88,7 @@ rem worked around by passing an obscure internal option. Clang 9 doesn't
rem have this problem. To keep things simple, let's just suppress this
rem warning.
rem
-set "ops=-m64 -std=c++1y -D_MT -D_CRT_SECURE_NO_WARNINGS -Xlinker /ignore:4217"
+set "ops=-finput-charset=UTF-8 -m64 -std=c++1y -D_MT -D_CRT_SECURE_NO_WARNINGS -Xlinker /ignore:4217"
:ops_next
shift
if "_%1_" == "__" (
diff --git a/bootstrap-mingw.bat b/bootstrap-mingw.bat
index df7e677..cfd9d7c 100644
--- a/bootstrap-mingw.bat
+++ b/bootstrap-mingw.bat
@@ -83,7 +83,7 @@ rem
rem Note that for as long as we support GCC 4.9 we have to compile in the
rem C++14 mode since 4.9 doesn't recognize c++1z.
rem
-set "ops=-std=c++1y"
+set "ops=-finput-charset=UTF-8 -std=c++1y"
:ops_next
shift
if "_%1_" == "__" (
diff --git a/bootstrap-msvc.bat b/bootstrap-msvc.bat
index 3d74427..6a6fcbc 100644
--- a/bootstrap-msvc.bat
+++ b/bootstrap-msvc.bat
@@ -111,7 +111,7 @@ set "src=%src% %libbutl%\libbutl"
rem Get the compile options.
rem
-set "ops=/nologo /EHsc /MT /MP"
+set "ops=/nologo /utf-8 /EHsc /MT /MP"
:ops_next
shift
if "_%1_" == "__" (
diff --git a/bootstrap.gmake b/bootstrap.gmake
index 1e0e8e2..e5ab285 100644
--- a/bootstrap.gmake
+++ b/bootstrap.gmake
@@ -190,13 +190,13 @@ $(out_root)/build2/b-boot$(exe): $(build2_obj) $(libbuild2_obj) $(libbutl_obj)
$(CXX) -std=c++1y $(CXXFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
$(out_root)/build2/%.b.o: $(src_root)/build2/%.cxx | $$(dir $$@).
- $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -std=c++1y $(CXXFLAGS) -o $@ -c $<
+ $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -finput-charset=UTF-8 -std=c++1y $(CXXFLAGS) -o $@ -c $<
$(out_root)/libbuild2/%.b.o: $(src_root)/libbuild2/%.cxx | $$(dir $$@).
- $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -std=c++1y $(CXXFLAGS) -o $@ -c $<
+ $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -finput-charset=UTF-8 -std=c++1y $(CXXFLAGS) -o $@ -c $<
$(libbutl_out)/%.b.o: $(libbutl)/libbutl/%.cxx | $$(dir $$@).
- $(CXX) -I$(libbutl) -DBUILD2_BOOTSTRAP $(CPPFLAGS) -std=c++1y $(CXXFLAGS) -o $@ -c $<
+ $(CXX) -I$(libbutl) -DBUILD2_BOOTSTRAP $(CPPFLAGS) -finput-charset=UTF-8 -std=c++1y $(CXXFLAGS) -o $@ -c $<
.PRECIOUS: %/.
%/. :
diff --git a/bootstrap.sh b/bootstrap.sh
index 14e52cf..9bd13b4 100755
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -147,4 +147,4 @@ done
# mode since 4.9 doesn't recognize c++1z.
#
set -x
-"$cxx" "-I$libbutl" -I. -DBUILD2_BOOTSTRAP '-DBUILD2_HOST_TRIPLET="'"$host"'"' -std=c++1y "$@" -o build2/b-boot $r -lpthread
+"$cxx" "-I$libbutl" -I. -DBUILD2_BOOTSTRAP '-DBUILD2_HOST_TRIPLET="'"$host"'"' -finput-charset=UTF-8 -std=c++1y "$@" -o build2/b-boot $r -lpthread
diff --git a/build/root.build b/build/root.build
index 4925c19..3afdcf9 100644
--- a/build/root.build
+++ b/build/root.build
@@ -20,7 +20,7 @@ if ($cxx.target.system == 'win32-msvc')
cxx.poptions += -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS
if ($cxx.class == 'msvc')
- cxx.coptions += /wd4251 /wd4275 /wd4800 /wd4819
+ cxx.coptions += /wd4251 /wd4275 /wd4800
elif ($cxx.id == 'gcc')
cxx.coptions += -Wno-maybe-uninitialized -Wno-free-nonheap-object \
-Wno-stringop-overread # libbutl
diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx
index 29b32c6..77d01c6 100644
--- a/libbuild2/cc/compile-rule.cxx
+++ b/libbuild2/cc/compile-rule.cxx
@@ -250,7 +250,7 @@ namespace build2
compile_rule::
compile_rule (data&& d)
: common (move (d)),
- rule_id (string (x) += ".compile 5")
+ rule_id (string (x) += ".compile 6")
{
static_assert (sizeof (match_data) <= target::data_size,
"insufficient space");
@@ -1149,15 +1149,6 @@ namespace build2
append_options (cs, t, c_coptions);
append_options (cs, t, x_coptions);
-
- if (ot == otype::s)
- {
- // On Darwin, Win32 -fPIC is the default.
- //
- if (tclass == "linux" || tclass == "bsd")
- cs.append ("-fPIC");
- }
-
append_options (cs, cmode);
if (md.pp != preprocessed::all)
@@ -3015,8 +3006,8 @@ namespace build2
//
// So seeing that it is hard to trigger a legitimate VC preprocessor
// warning, for now, we will just treat them as errors by adding /WX.
- // BTW, another example of a plausible preprocessor warning is C4819
- // (character unrepresentable in source charset).
+ // BTW, another example of a plausible preprocessor warnings are C4819
+ // and C4828 (character unrepresentable in source charset).
//
// Finally, if we are using the module mapper, then all this mess falls
// away: we only run the compiler once, we let the diagnostics through,
@@ -3263,8 +3254,24 @@ namespace build2
append_options (args, cmode);
append_sys_hdr_options (args); // Extra system header dirs (last).
- // See perform_update() for details on /external:W0, /EHsc, /MD.
+ // See perform_update() for details on the choice of options.
//
+ {
+ bool sc (find_option_prefix ("/source-charset:", args));
+ bool ec (find_option_prefix ("/execution-charset:", args));
+
+ if (!sc && !ec)
+ args.push_back ("/utf-8");
+ else
+ {
+ if (!sc)
+ args.push_back ("/source-charset:UTF-8");
+
+ if (!ec)
+ args.push_back ("/execution-charset:UTF-8");
+ }
+ }
+
if (cvariant != "clang" && isystem (*this))
{
if (find_option_prefix ("/external:I", args) &&
@@ -3305,8 +3312,15 @@ namespace build2
}
case compiler_class::gcc:
{
+ append_options (args, cmode,
+ cmode.size () - (modules && clang ? 1 : 0));
+ append_sys_hdr_options (args); // Extra system header dirs (last).
+
// See perform_update() for details on the choice of options.
//
+ if (!find_option_prefix ("-finput-charset=", args))
+ args.push_back ("-finput-charset=UTF-8");
+
if (ot == otype::s)
{
if (tclass == "linux" || tclass == "bsd")
@@ -3335,10 +3349,6 @@ namespace build2
}
}
- append_options (args, cmode,
- cmode.size () - (modules && clang ? 1 : 0));
- append_sys_hdr_options (args); // Extra system header dirs (last).
-
// Setup the dynamic module mapper if needed.
//
// Note that it's plausible in the future we will use it even if
@@ -4609,8 +4619,24 @@ namespace build2
append_options (args, cmode);
append_sys_hdr_options (args);
- // See perform_update() for details on /external:W0, /EHsc, /MD.
+ // See perform_update() for details on the choice of options.
//
+ {
+ bool sc (find_option_prefix ("/source-charset:", args));
+ bool ec (find_option_prefix ("/execution-charset:", args));
+
+ if (!sc && !ec)
+ args.push_back ("/utf-8");
+ else
+ {
+ if (!sc)
+ args.push_back ("/source-charset:UTF-8");
+
+ if (!ec)
+ args.push_back ("/execution-charset:UTF-8");
+ }
+ }
+
if (cvariant != "clang" && isystem (*this))
{
if (find_option_prefix ("/external:I", args) &&
@@ -4635,6 +4661,15 @@ namespace build2
}
case compiler_class::gcc:
{
+ append_options (args, cmode,
+ cmode.size () - (modules && clang ? 1 : 0));
+ append_sys_hdr_options (args);
+
+ // See perform_update() for details on the choice of options.
+ //
+ if (!find_option_prefix ("-finput-charset=", args))
+ args.push_back ("-finput-charset=UTF-8");
+
if (ot == otype::s)
{
if (tclass == "linux" || tclass == "bsd")
@@ -4663,10 +4698,6 @@ namespace build2
}
}
- append_options (args, cmode,
- cmode.size () - (modules && clang ? 1 : 0));
- append_sys_hdr_options (args);
-
args.push_back ("-E");
append_lang_options (args, md);
@@ -6518,6 +6549,27 @@ namespace build2
if (md.pp != preprocessed::all)
append_sys_hdr_options (args); // Extra system header dirs (last).
+ // Set source/execution charsets to UTF-8 unless a custom charset
+ // is specified.
+ //
+ // Note that clang-cl supports /utf-8 and /*-charset.
+ //
+ {
+ bool sc (find_option_prefix ("/source-charset:", args));
+ bool ec (find_option_prefix ("/execution-charset:", args));
+
+ if (!sc && !ec)
+ args.push_back ("/utf-8");
+ else
+ {
+ if (!sc)
+ args.push_back ("/source-charset:UTF-8");
+
+ if (!ec)
+ args.push_back ("/execution-charset:UTF-8");
+ }
+ }
+
// If we have any /external:I options but no /external:Wn, then add
// /external:W0 to emulate the -isystem semantics.
//
@@ -6631,6 +6683,21 @@ namespace build2
}
case compiler_class::gcc:
{
+ append_options (args, cmode);
+
+ if (md.pp != preprocessed::all)
+ append_sys_hdr_options (args); // Extra system header dirs (last).
+
+ // Set the input charset to UTF-8 unless a custom one is specified.
+ //
+ // Note that the execution charset (-fexec-charset) is UTF-8 by
+ // default.
+ //
+ // Note that early versions of Clang only recognize uppercase UTF-8.
+ //
+ if (!find_option_prefix ("-finput-charset=", args))
+ args.push_back ("-finput-charset=UTF-8");
+
if (ot == otype::s)
{
// On Darwin, Win32 -fPIC is the default.
@@ -6734,11 +6801,6 @@ namespace build2
}
}
- append_options (args, cmode);
-
- if (md.pp != preprocessed::all)
- append_sys_hdr_options (args); // Extra system header dirs (last).
-
append_header_options (env, args, header_args, a, t, md, md.dd);
append_module_options (env, args, module_args, a, t, md, md.dd);