diff --git a/src/BUILD b/src/BUILD index e80014d9796e96..95cd961d3064a9 100644 --- a/src/BUILD +++ b/src/BUILD @@ -180,16 +180,22 @@ filegroup( visibility = ["//src/test/shell/bazel:__pkg__"], ) -# This program patches the app manifest of the java.exe launcher to force its -# active code page to UTF-8 on Windows 1903 and later, which is required for -# proper support of Unicode characters outside the system code page. -# The JDK currently (as of JDK 23) doesn't support this natively: -# https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html +# Reads the app manifest of a Windows executable. cc_binary( - name = "patch_java_manifest_for_utf8", - srcs = ["patch_java_manifest_for_utf8.cc"], + name = "read_manifest", + srcs = ["read_manifest.cc"], tags = ["manual"], target_compatible_with = ["@platforms//os:windows"], + visibility = ["//src/java_tools:__subpackages__"], +) + +# Updates the app manifest of a Windows executable. +cc_binary( + name = "write_manifest", + srcs = ["write_manifest.cc"], + tags = ["manual"], + target_compatible_with = ["@platforms//os:windows"], + visibility = ["//src/java_tools:__subpackages__"], ) sh_binary( @@ -197,7 +203,8 @@ sh_binary( srcs = ["minimize_jdk.sh"], data = select({ "@platforms//os:windows": [ - ":patch_java_manifest_for_utf8", + ":read_manifest", + ":write_manifest", ], "//conditions:default": [], }), diff --git a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD index 4abefcb4b2b8d9..27e0d9588ce90a 100644 --- a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD +++ b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD @@ -1,5 +1,5 @@ load("@rules_graalvm//graalvm:defs.bzl", "native_image") -load("@rules_java//java:defs.bzl", "java_binary") +load("@rules_java//java:defs.bzl", "java_binary", "java_library") package( default_applicable_licenses = ["//:license"], @@ -26,13 +26,21 @@ java_binary( runtime_deps = [":turbine_deps"], ) -native_image( +alias( name = "turbine_direct_graal", + actual = select({ + "@platforms//os:windows": ":turbine_direct_graal_with_app_manifest", + "//conditions:default": ":turbine_direct_graal_unpatched", + }), +) + +native_image( + name = "turbine_direct_graal_unpatched", executable_name = select({ # TODO(cushon): restore .exe suffix on windows # see https://github.com/sgammon/rules_graalvm/issues/324 - "@bazel_tools//src/conditions:windows": "%target%", - "//conditions:default": "%target%", + "@bazel_tools//src/conditions:windows": "turbine_direct_graal_unpatched", + "//conditions:default": "turbine_direct_graal", }), extra_args = [ # Workaround for https://github.com/oracle/graal/issues/4757. @@ -68,6 +76,26 @@ native_image( deps = [":turbine_deps"], ) +# On Windows, add an app manifest to the binary to force it to run with a UTF-8 +# code page. It is built with one, but without the app manifest it will not be +# able to use UTF-8 for filesystem operations. +# https://github.com/oracle/graal/issues/10237 +genrule( + name = "turbine_direct_graal_with_app_manifest", + srcs = [ + ":turbine_direct_graal_unpatched", + "turbine_direct_graal.manifest", + ], + outs = ["turbine_direct_graal.exe"], + cmd = """\ +cp $(location :turbine_direct_graal_unpatched) $@ +chmod +w $@ +cat $(location turbine_direct_graal.manifest) | $(location //src:write_manifest) $@ +""", + target_compatible_with = ["@platforms//os:windows"], + tools = ["//src:write_manifest"], +) + # Run with -c opt. sh_binary( name = "turbine_benchmark", diff --git a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/turbine_direct_graal.manifest b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/turbine_direct_graal.manifest new file mode 100644 index 00000000000000..765ea25b7e87ee --- /dev/null +++ b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/turbine_direct_graal.manifest @@ -0,0 +1,19 @@ + + + +Turbine + + + + UTF-8 + + + + diff --git a/src/minimize_jdk.sh b/src/minimize_jdk.sh index 4e5d68b6b2e4ec..638a114e416789 100755 --- a/src/minimize_jdk.sh +++ b/src/minimize_jdk.sh @@ -61,7 +61,6 @@ fi UNAME=$(uname -s | tr 'A-Z' 'a-z') if [[ "$UNAME" =~ msys_nt* ]]; then - set -x mkdir "tmp.$$" cd "tmp.$$" unzip -q "../$fulljdk" @@ -74,7 +73,14 @@ if [[ "$UNAME" =~ msys_nt* ]]; then --vm=server --strip-debug --no-man-pages \ --add-options=' --enable-native-access=ALL-UNNAMED' \ --output reduced - "$(rlocation "io_bazel/src/patch_java_manifest_for_utf8.exe")" reduced/bin/java.exe + # Patch the app manifest of the java.exe launcher to force its active code + # page to UTF-8 on Windows 1903 and later, which is required for proper + # support of Unicode characters outside the system code page. + # The JDK currently (as of JDK 23) doesn't support this natively: + # https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html + "$(rlocation io_bazel/src/read_manifest.exe)" reduced/bin/java.exe \ + | sed 's||UTF-8&|' \ + | "$(rlocation io_bazel/src/write_manifest.exe)" reduced/bin/java.exe cp $DOCS legal/java.base/ASSEMBLY_EXCEPTION \ reduced/ # These are necessary for --host_jvm_debug to work. diff --git a/src/patch_java_manifest_for_utf8.cc b/src/patch_java_manifest_for_utf8.cc deleted file mode 100644 index cc04c07761abc6..00000000000000 --- a/src/patch_java_manifest_for_utf8.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2024 The Bazel Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#define WIN32_LEAN_AND_MEAN -#include - -#include - -// This program patches the app manifest of the java.exe launcher to force its -// active code page to UTF-8 on Windows 1903 and later. -// https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page#set-a-process-code-page-to-utf-8 -// -// This is necessary because the launcher sets sun.jnu.encoding to the system -// code page, which by default is a legacy code page such as Cp1252 on Windows. -// This causes the JVM to be unable to interact with files whose paths contain -// Unicode characters not representable in the system code page, as well as -// command-line arguments and environment variables containing such characters. -// -// Usage in the libjava.dll code: -// https://github.com/openjdk/jdk/blob/e7f0bf11ff0e89b6b156d5e88ca3771c706aa46a/src/java.base/windows/native/libjava/java_props_md.c#L63-L65 -int wmain(int argc, wchar_t *argv[]) { - if (argc != 2) { - fwprintf(stderr, L"Usage: %ls \n", argv[0]); - return 1; - } - - // Read the app manifest (aka side-by-side or fusion manifest) from the - // executable, which requires loading it as a "module". - HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE); - if (!exe) { - fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError()); - return 1; - } - HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST); - if (!manifest_resource) { - fwprintf(stderr, L"Resource not found: %d\n", GetLastError()); - return 1; - } - HGLOBAL manifest_handle = LoadResource(exe, manifest_resource); - if (!manifest_handle) { - fwprintf(stderr, L"Error loading resource: %d\n", GetLastError()); - return 1; - } - LPVOID manifest_data = LockResource(manifest_handle); - if (!manifest_data) { - fwprintf(stderr, L"Error locking resource: %d\n", GetLastError()); - return 1; - } - DWORD manifest_len = SizeofResource(exe, manifest_resource); - std::string manifest((char *)manifest_data, manifest_len); - UnlockResource(manifest_handle); - FreeResource(manifest_handle); - FreeLibrary(exe); - - // Insert the activeCodePage element into the manifest at the end of the - // windowsSettings element. - // https://github.com/openjdk/jdk/blob/29882bfe7b7e76446a96862cd0a5e81c7e054415/src/java.base/windows/native/launcher/java.manifest#L43 - std::size_t insert_pos = manifest.find(""); - if (insert_pos == std::wstring::npos) { - fwprintf(stderr, L"End tag not found in manifest:\n%hs", manifest.c_str()); - return 1; - } - std::string new_manifest = manifest.substr(0, insert_pos) + - "UTF-8" + - manifest.substr(insert_pos); - - // Write back the modified app manifest. - HANDLE update_handle = BeginUpdateResourceW(argv[1], false); - if (!update_handle) { - fwprintf(stderr, L"Error opening file %ls for update: %d\n", argv[1], - GetLastError()); - return 1; - } - if (!UpdateResourceA(update_handle, RT_MANIFEST, MAKEINTRESOURCE(1), - MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), - const_cast(new_manifest.c_str()), - new_manifest.size())) { - fwprintf(stderr, L"Error updating resource: %d\n", GetLastError()); - return 1; - } - if (!EndUpdateResourceW(update_handle, false)) { - fwprintf(stderr, L"Error finalizing update: %d\n", GetLastError()); - return 1; - } - - return 0; -} diff --git a/src/read_manifest.cc b/src/read_manifest.cc new file mode 100644 index 00000000000000..fb7b0ee7f41dbc --- /dev/null +++ b/src/read_manifest.cc @@ -0,0 +1,62 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#define WIN32_LEAN_AND_MEAN +#include + +#include + +// Extracts the app manifest of a Windows executable and prints it to stdout. +int wmain(int argc, wchar_t *argv[]) { + if (argc != 2) { + fwprintf(stderr, L"Usage: %ls \n", argv[0]); + return 1; + } + + // Read the app manifest (aka side-by-side or fusion manifest) from the + // executable, which requires loading it as a "module". + HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE); + if (!exe) { + fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError()); + return 1; + } + HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST); + if (!manifest_resource) { + fwprintf(stderr, L"Resource not found: %d\n", GetLastError()); + return 1; + } + HGLOBAL manifest_handle = LoadResource(exe, manifest_resource); + if (!manifest_handle) { + fwprintf(stderr, L"Error loading resource: %d\n", GetLastError()); + return 1; + } + LPVOID manifest_data = LockResource(manifest_handle); + if (!manifest_data) { + fwprintf(stderr, L"Error locking resource: %d\n", GetLastError()); + return 1; + } + DWORD manifest_len = SizeofResource(exe, manifest_resource); + + // Write the manifest to stdout. + fwrite(manifest_data, 1, manifest_len, stdout); + + UnlockResource(manifest_handle); + FreeResource(manifest_handle); + FreeLibrary(exe); + + return 0; +} diff --git a/src/test/shell/bazel/bazel_java_test.sh b/src/test/shell/bazel/bazel_java_test.sh index 320f9ee44e8d6f..3a37900b9fb21b 100755 --- a/src/test/shell/bazel/bazel_java_test.sh +++ b/src/test/shell/bazel/bazel_java_test.sh @@ -2049,6 +2049,40 @@ EOF bazel build //pkg:a >& $TEST_log || fail "build failed" } +function test_header_compiler_direct_supports_unicode() { + if [[ "${JAVA_TOOLS_ZIP}" == released && "$is_windows" ]]; then + # TODO: Enable test after the next java_tools release. + return 0 + fi + + # JVMs on macOS always support UTF-8 since JEP 400. + # Windows releases of Turbine are built on a machine with system code page set + # to UTF-8 so that Graal picks up the correct sun.jnu.encoding value *and* + # have an app manifest patched in to set the system code page to UTF-8 at + # runtime. + if [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 + if [[ $(locale charmap) != "UTF-8" ]]; then + echo "Skipping test due to missing UTF-8 locale" + return 0 + fi + fi + local -r unicode="äöüÄÖÜß🌱" + mkdir -p pkg + cat << EOF > pkg/BUILD +java_library(name = "a", srcs = ["A.java"], deps = [":b"]) +java_library(name = "b", srcs = ["${unicode}.java"]) +EOF + cat << 'EOF' > pkg/A.java +public class A extends B {} +EOF + cat << 'EOF' > "pkg/${unicode}.java" +class B {} +EOF + + bazel build //pkg:a //pkg:b >& $TEST_log || fail "build failed" +} + function test_sandboxed_multiplexing() { mkdir -p pkg cat << 'EOF' > pkg/BUILD diff --git a/src/write_manifest.cc b/src/write_manifest.cc new file mode 100644 index 00000000000000..45b9a8f21b1e46 --- /dev/null +++ b/src/write_manifest.cc @@ -0,0 +1,56 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#define WIN32_LEAN_AND_MEAN +#include + +#include + +// Writes a new app manifest to a Windows executable. +int wmain(int argc, wchar_t *argv[]) { + if (argc != 2) { + fwprintf(stderr, L"Usage: %ls \n", argv[0]); + return 1; + } + + std::string new_manifest; + char buf[4096]; + size_t n; + while ((n = fread(buf, 1, sizeof(buf), stdin)) > 0) { + new_manifest.append(buf, n); + } + + HANDLE update_handle = BeginUpdateResourceW(argv[1], false); + if (!update_handle) { + fwprintf(stderr, L"Error opening file %ls for update: %d\n", argv[1], + GetLastError()); + return 1; + } + if (!UpdateResourceA(update_handle, RT_MANIFEST, MAKEINTRESOURCE(1), + MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), + const_cast(new_manifest.c_str()), + new_manifest.size())) { + fwprintf(stderr, L"Error updating resource: %d\n", GetLastError()); + return 1; + } + if (!EndUpdateResourceW(update_handle, false)) { + fwprintf(stderr, L"Error finalizing update: %d\n", GetLastError()); + return 1; + } + + return 0; +}