Skip to content

Commit 5843a6e

Browse files
committed
std: optimise utf8ByteSequenceLength
Also tested (but not as fast): ```zig pub fn utf8ByteSequenceLength(first_byte: u8) !u3 { const len = @clz(u8, ~first_byte); if (len == 0) return 1; if (len < 4) return @intcast(u3, len); return error.Utf8InvalidStartByte; } ```
1 parent 8b72eed commit 5843a6e

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

lib/std/unicode.zig

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ pub fn utf8CodepointSequenceLength(c: u32) !u3 {
1818
/// returns a number 1-4 indicating the total length of the codepoint in bytes.
1919
/// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte.
2020
pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {
21-
if (first_byte < 0b10000000) return @as(u3, 1);
22-
if (first_byte & 0b11100000 == 0b11000000) return @as(u3, 2);
23-
if (first_byte & 0b11110000 == 0b11100000) return @as(u3, 3);
24-
if (first_byte & 0b11111000 == 0b11110000) return @as(u3, 4);
25-
return error.Utf8InvalidStartByte;
21+
return switch (@clz(u8, ~first_byte)) {
22+
0 => 1,
23+
2 => 2,
24+
3 => 3,
25+
4 => 4,
26+
else => error.Utf8InvalidStartByte,
27+
};
2628
}
2729

2830
/// Encodes the given codepoint into a UTF-8 byte sequence.

0 commit comments

Comments
 (0)