Skip to content

Commit 68b8791

Browse files
committed
Fix handling of Windows (WTF-16) and WASI (UTF-8) paths
Windows paths now use WTF-16 <-> WTF-8 conversion everywhere, which is lossless. Previously, conversion of ill-formed UTF-16 paths would either fail or invoke illegal behavior. WASI paths must be valid UTF-8, and the relevant function calls have been updated to handle the possibility of failure due to paths not being encoded/encodable as valid UTF-8. Closes #18694 Closes #1774 Closes #2565
1 parent f6b6b8a commit 68b8791

20 files changed

+1000
-1109
lines changed

deps/aro/aro/Compilation.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ pub const Environment = struct {
6969
const val: ?[]const u8 = std.process.getEnvVarOwned(allocator, env_var_name) catch |err| switch (err) {
7070
error.OutOfMemory => |e| return e,
7171
error.EnvironmentVariableNotFound => null,
72-
error.InvalidUtf8 => null,
72+
error.InvalidWtf8 => null,
7373
};
7474
@field(env, field.name) = val;
7575
}

deps/aro/aro/Driver.zig

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,8 @@ pub fn errorDescription(e: anyerror) []const u8 {
523523
error.NotDir => "is not a directory",
524524
error.NotOpenForReading => "file is not open for reading",
525525
error.NotOpenForWriting => "file is not open for writing",
526-
error.InvalidUtf8 => "input is not valid UTF-8",
526+
error.InvalidUtf8 => "path is not valid UTF-8",
527+
error.InvalidWtf8 => "path is not valid WTF-8",
527528
error.FileBusy => "file is busy",
528529
error.NameTooLong => "file name is too long",
529530
error.AccessDenied => "access denied",

lib/std/Build/Cache.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
162162
fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {
163163
const relative = try std.fs.path.relative(allocator, prefix, path);
164164
errdefer allocator.free(relative);
165-
var component_iterator = std.fs.path.NativeUtf8ComponentIterator.init(relative) catch {
165+
var component_iterator = std.fs.path.NativeComponentIterator.init(relative) catch {
166166
return error.NotASubPath;
167167
};
168168
if (component_iterator.root() != null) {

lib/std/Thread.zig

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {
9191
},
9292
.windows => {
9393
var buf: [max_name_len]u16 = undefined;
94-
const len = try std.unicode.utf8ToUtf16Le(&buf, name);
94+
const len = try std.unicode.wtf8ToWtf16Le(&buf, name);
9595
const byte_len = math.cast(c_ushort, len * 2) orelse return error.NameTooLong;
9696

9797
// Note: NT allocates its own copy, no use-after-free here.
@@ -157,17 +157,12 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {
157157
}
158158

159159
pub const GetNameError = error{
160-
// For Windows, the name is converted from UTF16 to UTF8
161-
CodepointTooLarge,
162-
Utf8CannotEncodeSurrogateHalf,
163-
DanglingSurrogateHalf,
164-
ExpectedSecondSurrogateHalf,
165-
UnexpectedSecondSurrogateHalf,
166-
167160
Unsupported,
168161
Unexpected,
169162
} || os.PrctlError || os.ReadError || std.fs.File.OpenError || std.fmt.BufPrintError;
170163

164+
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
165+
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
171166
pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]const u8 {
172167
buffer_ptr[max_name_len] = 0;
173168
var buffer: [:0]u8 = buffer_ptr;
@@ -213,7 +208,7 @@ pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]co
213208
)) {
214209
.SUCCESS => {
215210
const string = @as(*const os.windows.UNICODE_STRING, @ptrCast(&buf));
216-
const len = try std.unicode.utf16LeToUtf8(buffer, string.Buffer[0 .. string.Length / 2]);
211+
const len = std.unicode.wtf16LeToWtf8(buffer, string.Buffer[0 .. string.Length / 2]);
217212
return if (len > 0) buffer[0..len] else null;
218213
},
219214
.NOT_IMPLEMENTED => return error.Unsupported,

lib/std/child_process.zig

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,9 @@ pub const ChildProcess = struct {
129129
/// POSIX-only. `StdIo.Ignore` was selected and opening `/dev/null` returned ENODEV.
130130
NoDevice,
131131

132-
/// Windows-only. One of:
133-
/// * `cwd` was provided and it could not be re-encoded into UTF16LE, or
134-
/// * The `PATH` or `PATHEXT` environment variable contained invalid UTF-8.
135-
InvalidUtf8,
132+
/// Windows-only. `cwd` or `argv` was provided and it was invalid WTF-8.
133+
/// https://simonsapin.github.io/wtf-8/
134+
InvalidWtf8,
136135

137136
/// Windows-only. `cwd` was provided, but the path did not exist when spawning the child process.
138137
CurrentWorkingDirectoryUnlinked,
@@ -767,16 +766,16 @@ pub const ChildProcess = struct {
767766
};
768767
var piProcInfo: windows.PROCESS_INFORMATION = undefined;
769768

770-
const cwd_w = if (self.cwd) |cwd| try unicode.utf8ToUtf16LeAllocZ(self.allocator, cwd) else null;
769+
const cwd_w = if (self.cwd) |cwd| try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd) else null;
771770
defer if (cwd_w) |cwd| self.allocator.free(cwd);
772771
const cwd_w_ptr = if (cwd_w) |cwd| cwd.ptr else null;
773772

774773
const maybe_envp_buf = if (self.env_map) |env_map| try createWindowsEnvBlock(self.allocator, env_map) else null;
775774
defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf);
776775
const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null;
777776

778-
const app_name_utf8 = self.argv[0];
779-
const app_name_is_absolute = fs.path.isAbsolute(app_name_utf8);
777+
const app_name_wtf8 = self.argv[0];
778+
const app_name_is_absolute = fs.path.isAbsolute(app_name_wtf8);
780779

781780
// the cwd set in ChildProcess is in effect when choosing the executable path
782781
// to match posix semantics
@@ -785,11 +784,11 @@ pub const ChildProcess = struct {
785784
// If the app name is absolute, then we need to use its dirname as the cwd
786785
if (app_name_is_absolute) {
787786
cwd_path_w_needs_free = true;
788-
const dir = fs.path.dirname(app_name_utf8).?;
789-
break :x try unicode.utf8ToUtf16LeAllocZ(self.allocator, dir);
787+
const dir = fs.path.dirname(app_name_wtf8).?;
788+
break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, dir);
790789
} else if (self.cwd) |cwd| {
791790
cwd_path_w_needs_free = true;
792-
break :x try unicode.utf8ToUtf16LeAllocZ(self.allocator, cwd);
791+
break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd);
793792
} else {
794793
break :x &[_:0]u16{}; // empty for cwd
795794
}
@@ -800,19 +799,19 @@ pub const ChildProcess = struct {
800799
// into the basename and dirname and use the dirname as an addition to the cwd
801800
// path. This is because NtQueryDirectoryFile cannot accept FileName params with
802801
// path separators.
803-
const app_basename_utf8 = fs.path.basename(app_name_utf8);
802+
const app_basename_wtf8 = fs.path.basename(app_name_wtf8);
804803
// If the app name is absolute, then the cwd will already have the app's dirname in it,
805804
// so only populate app_dirname if app name is a relative path with > 0 path separators.
806-
const maybe_app_dirname_utf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_utf8) else null;
805+
const maybe_app_dirname_wtf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_wtf8) else null;
807806
const app_dirname_w: ?[:0]u16 = x: {
808-
if (maybe_app_dirname_utf8) |app_dirname_utf8| {
809-
break :x try unicode.utf8ToUtf16LeAllocZ(self.allocator, app_dirname_utf8);
807+
if (maybe_app_dirname_wtf8) |app_dirname_wtf8| {
808+
break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_dirname_wtf8);
810809
}
811810
break :x null;
812811
};
813812
defer if (app_dirname_w != null) self.allocator.free(app_dirname_w.?);
814813

815-
const app_name_w = try unicode.utf8ToUtf16LeAllocZ(self.allocator, app_basename_utf8);
814+
const app_name_w = try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_basename_wtf8);
816815
defer self.allocator.free(app_name_w);
817816

818817
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
@@ -1173,7 +1172,7 @@ const CreateProcessSupportedExtension = enum {
11731172
exe,
11741173
};
11751174

1176-
/// Case-insensitive UTF-16 lookup
1175+
/// Case-insensitive WTF-16 lookup
11771176
fn windowsCreateProcessSupportsExtension(ext: []const u16) ?CreateProcessSupportedExtension {
11781177
if (ext.len != 4) return null;
11791178
const State = enum {
@@ -1237,7 +1236,7 @@ test "windowsCreateProcessSupportsExtension" {
12371236
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
12381237
}
12391238

1240-
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };
1239+
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidWtf8, InvalidArg0 };
12411240

12421241
/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
12431242
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
@@ -1320,7 +1319,7 @@ pub fn argvToCommandLineWindows(
13201319
}
13211320
}
13221321

1323-
return try unicode.utf8ToUtf16LeAllocZ(allocator, buf.items);
1322+
return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items);
13241323
}
13251324

13261325
test "argvToCommandLineWindows" {
@@ -1386,7 +1385,7 @@ fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []c
13861385
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
13871386
defer std.testing.allocator.free(cmd_line_w);
13881387

1389-
const cmd_line = try unicode.utf16LeToUtf8Alloc(std.testing.allocator, cmd_line_w);
1388+
const cmd_line = try unicode.wtf16LeToWtf8Alloc(std.testing.allocator, cmd_line_w);
13901389
defer std.testing.allocator.free(cmd_line);
13911390

13921391
try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
@@ -1424,7 +1423,7 @@ fn windowsMakeAsyncPipe(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *cons
14241423
"\\\\.\\pipe\\zig-childprocess-{d}-{d}",
14251424
.{ windows.kernel32.GetCurrentProcessId(), pipe_name_counter.fetchAdd(1, .Monotonic) },
14261425
) catch unreachable;
1427-
const len = std.unicode.utf8ToUtf16Le(&tmp_bufw, pipe_path) catch unreachable;
1426+
const len = std.unicode.wtf8ToWtf16Le(&tmp_bufw, pipe_path) catch unreachable;
14281427
tmp_bufw[len] = 0;
14291428
break :blk tmp_bufw[0..len :0];
14301429
};
@@ -1521,10 +1520,10 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) !
15211520
var it = env_map.iterator();
15221521
var i: usize = 0;
15231522
while (it.next()) |pair| {
1524-
i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*);
1523+
i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*);
15251524
result[i] = '=';
15261525
i += 1;
1527-
i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*);
1526+
i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*);
15281527
result[i] = 0;
15291528
i += 1;
15301529
}

0 commit comments

Comments
 (0)