Skip to content

Commit ab60654

Browse files
committed
std: simplify utf8ToUtf16Le
Also faster, on my machine unicode/throughput_test.zig now gives e.g. > original utf8ToUtf16Le: elapsed: 1048 ns (0 ms) > new utf8ToUtf16Le: elapsed: 971 ns (0 ms)
1 parent 5843a6e commit ab60654

File tree

1 file changed

+14
-26
lines changed

1 file changed

+14
-26
lines changed

lib/std/unicode.zig

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -576,33 +576,21 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
576576
var dest_i: usize = 0;
577577
var src_i: usize = 0;
578578
while (src_i < utf8.len) {
579-
const byte = utf8[src_i];
580-
const n = @clz(u8, ~byte);
581-
switch (n) {
582-
0 => {
583-
utf16le[dest_i] = byte;
584-
dest_i += 1;
585-
src_i += 1;
586-
continue;
587-
},
588-
2, 3, 4 => {
589-
const next_src_i = src_i + n;
590-
const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8;
591-
if (codepoint < 0x10000) {
592-
const short = @intCast(u16, codepoint);
593-
utf16le[dest_i] = mem.nativeToLittle(u16, short);
594-
dest_i += 1;
595-
} else {
596-
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
597-
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
598-
utf16le[dest_i] = mem.nativeToLittle(u16, high);
599-
utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
600-
dest_i += 2;
601-
}
602-
src_i = next_src_i;
603-
},
604-
else => return error.InvalidUtf8,
579+
const n = utf8ByteSequenceLength(utf8[src_i]) catch return error.InvalidUtf8;
580+
const next_src_i = src_i + n;
581+
const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch return error.InvalidUtf8;
582+
if (codepoint < 0x10000) {
583+
const short = @intCast(u16, codepoint);
584+
utf16le[dest_i] = mem.nativeToLittle(u16, short);
585+
dest_i += 1;
586+
} else {
587+
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
588+
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
589+
utf16le[dest_i] = mem.nativeToLittle(u16, high);
590+
utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
591+
dest_i += 2;
605592
}
593+
src_i = next_src_i;
606594
}
607595
return dest_i;
608596
}

0 commit comments

Comments
 (0)