diff --git a/src/BUILD b/src/BUILD
index e80014d9796e96..95cd961d3064a9 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -180,16 +180,22 @@ filegroup(
visibility = ["//src/test/shell/bazel:__pkg__"],
)
-# This program patches the app manifest of the java.exe launcher to force its
-# active code page to UTF-8 on Windows 1903 and later, which is required for
-# proper support of Unicode characters outside the system code page.
-# The JDK currently (as of JDK 23) doesn't support this natively:
-# https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html
+# Reads the app manifest of a Windows executable.
cc_binary(
- name = "patch_java_manifest_for_utf8",
- srcs = ["patch_java_manifest_for_utf8.cc"],
+ name = "read_manifest",
+ srcs = ["read_manifest.cc"],
tags = ["manual"],
target_compatible_with = ["@platforms//os:windows"],
+ visibility = ["//src/java_tools:__subpackages__"],
+)
+
+# Updates the app manifest of a Windows executable.
+cc_binary(
+ name = "write_manifest",
+ srcs = ["write_manifest.cc"],
+ tags = ["manual"],
+ target_compatible_with = ["@platforms//os:windows"],
+ visibility = ["//src/java_tools:__subpackages__"],
)
sh_binary(
@@ -197,7 +203,8 @@ sh_binary(
srcs = ["minimize_jdk.sh"],
data = select({
"@platforms//os:windows": [
- ":patch_java_manifest_for_utf8",
+ ":read_manifest",
+ ":write_manifest",
],
"//conditions:default": [],
}),
diff --git a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD
index 4abefcb4b2b8d9..27e0d9588ce90a 100644
--- a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD
+++ b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD
@@ -1,5 +1,5 @@
load("@rules_graalvm//graalvm:defs.bzl", "native_image")
-load("@rules_java//java:defs.bzl", "java_binary")
+load("@rules_java//java:defs.bzl", "java_binary", "java_library")
package(
default_applicable_licenses = ["//:license"],
@@ -26,13 +26,21 @@ java_binary(
runtime_deps = [":turbine_deps"],
)
-native_image(
+alias(
name = "turbine_direct_graal",
+ actual = select({
+ "@platforms//os:windows": ":turbine_direct_graal_with_app_manifest",
+ "//conditions:default": ":turbine_direct_graal_unpatched",
+ }),
+)
+
+native_image(
+ name = "turbine_direct_graal_unpatched",
executable_name = select({
# TODO(cushon): restore .exe suffix on windows
# see https://github.com/sgammon/rules_graalvm/issues/324
- "@bazel_tools//src/conditions:windows": "%target%",
- "//conditions:default": "%target%",
+ "@bazel_tools//src/conditions:windows": "turbine_direct_graal_unpatched",
+ "//conditions:default": "turbine_direct_graal",
}),
extra_args = [
# Workaround for https://github.com/oracle/graal/issues/4757.
@@ -68,6 +76,26 @@ native_image(
deps = [":turbine_deps"],
)
+# On Windows, add an app manifest to the binary to force it to run with a UTF-8
+# code page. It is built with one, but without the app manifest it will not be
+# able to use UTF-8 for filesystem operations.
+# https://github.com/oracle/graal/issues/10237
+genrule(
+ name = "turbine_direct_graal_with_app_manifest",
+ srcs = [
+ ":turbine_direct_graal_unpatched",
+ "turbine_direct_graal.manifest",
+ ],
+ outs = ["turbine_direct_graal.exe"],
+ cmd = """\
+cp $(location :turbine_direct_graal_unpatched) $@
+chmod +w $@
+cat $(location turbine_direct_graal.manifest) | $(location //src:write_manifest) $@
+""",
+ target_compatible_with = ["@platforms//os:windows"],
+ tools = ["//src:write_manifest"],
+)
+
# Run with -c opt.
sh_binary(
name = "turbine_benchmark",
diff --git a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/turbine_direct_graal.manifest b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/turbine_direct_graal.manifest
new file mode 100644
index 00000000000000..765ea25b7e87ee
--- /dev/null
+++ b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/turbine_direct_graal.manifest
@@ -0,0 +1,19 @@
+
+
+
+Turbine
+
+
+
+ UTF-8
+
+
+
+
diff --git a/src/minimize_jdk.sh b/src/minimize_jdk.sh
index 4e5d68b6b2e4ec..638a114e416789 100755
--- a/src/minimize_jdk.sh
+++ b/src/minimize_jdk.sh
@@ -61,7 +61,6 @@ fi
UNAME=$(uname -s | tr 'A-Z' 'a-z')
if [[ "$UNAME" =~ msys_nt* ]]; then
- set -x
mkdir "tmp.$$"
cd "tmp.$$"
unzip -q "../$fulljdk"
@@ -74,7 +73,14 @@ if [[ "$UNAME" =~ msys_nt* ]]; then
--vm=server --strip-debug --no-man-pages \
--add-options=' --enable-native-access=ALL-UNNAMED' \
--output reduced
- "$(rlocation "io_bazel/src/patch_java_manifest_for_utf8.exe")" reduced/bin/java.exe
+ # Patch the app manifest of the java.exe launcher to force its active code
+ # page to UTF-8 on Windows 1903 and later, which is required for proper
+ # support of Unicode characters outside the system code page.
+ # The JDK currently (as of JDK 23) doesn't support this natively:
+ # https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html
+ "$(rlocation io_bazel/src/read_manifest.exe)" reduced/bin/java.exe \
+ | sed 's||UTF-8&|' \
+ | "$(rlocation io_bazel/src/write_manifest.exe)" reduced/bin/java.exe
cp $DOCS legal/java.base/ASSEMBLY_EXCEPTION \
reduced/
# These are necessary for --host_jvm_debug to work.
diff --git a/src/patch_java_manifest_for_utf8.cc b/src/patch_java_manifest_for_utf8.cc
deleted file mode 100644
index cc04c07761abc6..00000000000000
--- a/src/patch_java_manifest_for_utf8.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2024 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include
-#include
-
-#define WIN32_LEAN_AND_MEAN
-#include
-
-#include
-
-// This program patches the app manifest of the java.exe launcher to force its
-// active code page to UTF-8 on Windows 1903 and later.
-// https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page#set-a-process-code-page-to-utf-8
-//
-// This is necessary because the launcher sets sun.jnu.encoding to the system
-// code page, which by default is a legacy code page such as Cp1252 on Windows.
-// This causes the JVM to be unable to interact with files whose paths contain
-// Unicode characters not representable in the system code page, as well as
-// command-line arguments and environment variables containing such characters.
-//
-// Usage in the libjava.dll code:
-// https://github.com/openjdk/jdk/blob/e7f0bf11ff0e89b6b156d5e88ca3771c706aa46a/src/java.base/windows/native/libjava/java_props_md.c#L63-L65
-int wmain(int argc, wchar_t *argv[]) {
- if (argc != 2) {
- fwprintf(stderr, L"Usage: %ls \n", argv[0]);
- return 1;
- }
-
- // Read the app manifest (aka side-by-side or fusion manifest) from the
- // executable, which requires loading it as a "module".
- HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE);
- if (!exe) {
- fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError());
- return 1;
- }
- HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST);
- if (!manifest_resource) {
- fwprintf(stderr, L"Resource not found: %d\n", GetLastError());
- return 1;
- }
- HGLOBAL manifest_handle = LoadResource(exe, manifest_resource);
- if (!manifest_handle) {
- fwprintf(stderr, L"Error loading resource: %d\n", GetLastError());
- return 1;
- }
- LPVOID manifest_data = LockResource(manifest_handle);
- if (!manifest_data) {
- fwprintf(stderr, L"Error locking resource: %d\n", GetLastError());
- return 1;
- }
- DWORD manifest_len = SizeofResource(exe, manifest_resource);
- std::string manifest((char *)manifest_data, manifest_len);
- UnlockResource(manifest_handle);
- FreeResource(manifest_handle);
- FreeLibrary(exe);
-
- // Insert the activeCodePage element into the manifest at the end of the
- // windowsSettings element.
- // https://github.com/openjdk/jdk/blob/29882bfe7b7e76446a96862cd0a5e81c7e054415/src/java.base/windows/native/launcher/java.manifest#L43
- std::size_t insert_pos = manifest.find("");
- if (insert_pos == std::wstring::npos) {
- fwprintf(stderr, L"End tag not found in manifest:\n%hs", manifest.c_str());
- return 1;
- }
- std::string new_manifest = manifest.substr(0, insert_pos) +
- "UTF-8" +
- manifest.substr(insert_pos);
-
- // Write back the modified app manifest.
- HANDLE update_handle = BeginUpdateResourceW(argv[1], false);
- if (!update_handle) {
- fwprintf(stderr, L"Error opening file %ls for update: %d\n", argv[1],
- GetLastError());
- return 1;
- }
- if (!UpdateResourceA(update_handle, RT_MANIFEST, MAKEINTRESOURCE(1),
- MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL),
- const_cast(new_manifest.c_str()),
- new_manifest.size())) {
- fwprintf(stderr, L"Error updating resource: %d\n", GetLastError());
- return 1;
- }
- if (!EndUpdateResourceW(update_handle, false)) {
- fwprintf(stderr, L"Error finalizing update: %d\n", GetLastError());
- return 1;
- }
-
- return 0;
-}
diff --git a/src/read_manifest.cc b/src/read_manifest.cc
new file mode 100644
index 00000000000000..fb7b0ee7f41dbc
--- /dev/null
+++ b/src/read_manifest.cc
@@ -0,0 +1,62 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+#include
+
+#define WIN32_LEAN_AND_MEAN
+#include
+
+#include
+
+// Extracts the app manifest of a Windows executable and prints it to stdout.
+int wmain(int argc, wchar_t *argv[]) {
+ if (argc != 2) {
+ fwprintf(stderr, L"Usage: %ls \n", argv[0]);
+ return 1;
+ }
+
+ // Read the app manifest (aka side-by-side or fusion manifest) from the
+ // executable, which requires loading it as a "module".
+ HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE);
+ if (!exe) {
+ fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError());
+ return 1;
+ }
+ HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST);
+ if (!manifest_resource) {
+ fwprintf(stderr, L"Resource not found: %d\n", GetLastError());
+ return 1;
+ }
+ HGLOBAL manifest_handle = LoadResource(exe, manifest_resource);
+ if (!manifest_handle) {
+ fwprintf(stderr, L"Error loading resource: %d\n", GetLastError());
+ return 1;
+ }
+ LPVOID manifest_data = LockResource(manifest_handle);
+ if (!manifest_data) {
+ fwprintf(stderr, L"Error locking resource: %d\n", GetLastError());
+ return 1;
+ }
+ DWORD manifest_len = SizeofResource(exe, manifest_resource);
+
+ // Write the manifest to stdout.
+ fwrite(manifest_data, 1, manifest_len, stdout);
+
+ UnlockResource(manifest_handle);
+ FreeResource(manifest_handle);
+ FreeLibrary(exe);
+
+ return 0;
+}
diff --git a/src/test/shell/bazel/bazel_java_test.sh b/src/test/shell/bazel/bazel_java_test.sh
index 320f9ee44e8d6f..3a37900b9fb21b 100755
--- a/src/test/shell/bazel/bazel_java_test.sh
+++ b/src/test/shell/bazel/bazel_java_test.sh
@@ -2049,6 +2049,40 @@ EOF
bazel build //pkg:a >& $TEST_log || fail "build failed"
}
+function test_header_compiler_direct_supports_unicode() {
+ if [[ "${JAVA_TOOLS_ZIP}" == released && "$is_windows" ]]; then
+ # TODO: Enable test after the next java_tools release.
+ return 0
+ fi
+
+ # JVMs on macOS always support UTF-8 since JEP 400.
+ # Windows releases of Turbine are built on a machine with system code page set
+ # to UTF-8 so that Graal picks up the correct sun.jnu.encoding value *and*
+ # have an app manifest patched in to set the system code page to UTF-8 at
+ # runtime.
+ if [[ "$(uname -s)" == "Linux" ]]; then
+ export LC_ALL=C.UTF-8
+ if [[ $(locale charmap) != "UTF-8" ]]; then
+ echo "Skipping test due to missing UTF-8 locale"
+ return 0
+ fi
+ fi
+ local -r unicode="äöüÄÖÜß🌱"
+ mkdir -p pkg
+ cat << EOF > pkg/BUILD
+java_library(name = "a", srcs = ["A.java"], deps = [":b"])
+java_library(name = "b", srcs = ["${unicode}.java"])
+EOF
+ cat << 'EOF' > pkg/A.java
+public class A extends B {}
+EOF
+ cat << 'EOF' > "pkg/${unicode}.java"
+class B {}
+EOF
+
+ bazel build //pkg:a //pkg:b >& $TEST_log || fail "build failed"
+}
+
function test_sandboxed_multiplexing() {
mkdir -p pkg
cat << 'EOF' > pkg/BUILD
diff --git a/src/write_manifest.cc b/src/write_manifest.cc
new file mode 100644
index 00000000000000..45b9a8f21b1e46
--- /dev/null
+++ b/src/write_manifest.cc
@@ -0,0 +1,56 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+#include
+
+#define WIN32_LEAN_AND_MEAN
+#include
+
+#include
+
+// Writes a new app manifest to a Windows executable.
+int wmain(int argc, wchar_t *argv[]) {
+ if (argc != 2) {
+ fwprintf(stderr, L"Usage: %ls \n", argv[0]);
+ return 1;
+ }
+
+ std::string new_manifest;
+ char buf[4096];
+ size_t n;
+ while ((n = fread(buf, 1, sizeof(buf), stdin)) > 0) {
+ new_manifest.append(buf, n);
+ }
+
+ HANDLE update_handle = BeginUpdateResourceW(argv[1], false);
+ if (!update_handle) {
+ fwprintf(stderr, L"Error opening file %ls for update: %d\n", argv[1],
+ GetLastError());
+ return 1;
+ }
+ if (!UpdateResourceA(update_handle, RT_MANIFEST, MAKEINTRESOURCE(1),
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL),
+ const_cast(new_manifest.c_str()),
+ new_manifest.size())) {
+ fwprintf(stderr, L"Error updating resource: %d\n", GetLastError());
+ return 1;
+ }
+ if (!EndUpdateResourceW(update_handle, false)) {
+ fwprintf(stderr, L"Error finalizing update: %d\n", GetLastError());
+ return 1;
+ }
+
+ return 0;
+}