Skip to content

Commit ecdec17

Browse files
committed
std: simplify utf8ToUtf16Le
Also faster, on my machine unicode/throughput_test.zig now gives e.g. > original utf8ToUtf16Le: elapsed: 1048 ns (0 ms) > new utf8ToUtf16Le: elapsed: 971 ns (0 ms)
1 parent 5843a6e commit ecdec17

File tree

2 files changed

+15
-27
lines changed

2 files changed

+15
-27
lines changed

lib/std/os/windows.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,7 @@ pub fn sliceToPrefixedSuffixedFileW(s: []const u8, comptime suffix: []const u16)
10191019
mem.copy(u16, result[0..], &prefix);
10201020
break :blk prefix.len;
10211021
};
1022-
const end_index = start_index + try std.unicode.utf8ToUtf16Le(result[start_index..], s);
1022+
const end_index = start_index + (std.unicode.utf8ToUtf16Le(result[start_index..], s) catch return error.InvalidUtf8);
10231023
if (end_index + suffix.len > result.len) return error.NameTooLong;
10241024
mem.copy(u16, result[end_index..], suffix);
10251025
result[end_index + suffix.len] = 0;

lib/std/unicode.zig

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -576,33 +576,21 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
576576
var dest_i: usize = 0;
577577
var src_i: usize = 0;
578578
while (src_i < utf8.len) {
579-
const byte = utf8[src_i];
580-
const n = @clz(u8, ~byte);
581-
switch (n) {
582-
0 => {
583-
utf16le[dest_i] = byte;
584-
dest_i += 1;
585-
src_i += 1;
586-
continue;
587-
},
588-
2, 3, 4 => {
589-
const next_src_i = src_i + n;
590-
const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8;
591-
if (codepoint < 0x10000) {
592-
const short = @intCast(u16, codepoint);
593-
utf16le[dest_i] = mem.nativeToLittle(u16, short);
594-
dest_i += 1;
595-
} else {
596-
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
597-
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
598-
utf16le[dest_i] = mem.nativeToLittle(u16, high);
599-
utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
600-
dest_i += 2;
601-
}
602-
src_i = next_src_i;
603-
},
604-
else => return error.InvalidUtf8,
579+
const n = try utf8ByteSequenceLength(utf8[src_i]);
580+
const next_src_i = src_i + n;
581+
const codepoint = try utf8Decode(utf8[src_i..next_src_i]);
582+
if (codepoint < 0x10000) {
583+
const short = @intCast(u16, codepoint);
584+
utf16le[dest_i] = mem.nativeToLittle(u16, short);
585+
dest_i += 1;
586+
} else {
587+
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
588+
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
589+
utf16le[dest_i] = mem.nativeToLittle(u16, high);
590+
utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
591+
dest_i += 2;
605592
}
593+
src_i = next_src_i;
606594
}
607595
return dest_i;
608596
}

0 commit comments

Comments
 (0)