Skip to content

Commit 08e5daa

Browse files
marler8997Vexu
authored andcommitted
Add std.unicode.fmtUtf16le
1 parent ee173d5 commit 08e5daa

File tree

1 file changed

+43
-2
lines changed

1 file changed

+43
-2
lines changed

lib/std/unicode.zig

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,9 @@ pub const Utf16LeIterator = struct {
317317
assert(it.i <= it.bytes.len);
318318
if (it.i == it.bytes.len) return null;
319319
const c0: u21 = mem.readIntLittle(u16, it.bytes[it.i..][0..2]);
320+
it.i += 2;
320321
if (c0 & ~@as(u21, 0x03ff) == 0xd800) {
321322
// surrogate pair
322-
it.i += 2;
323323
if (it.i >= it.bytes.len) return error.DanglingSurrogateHalf;
324324
const c1: u21 = mem.readIntLittle(u16, it.bytes[it.i..][0..2]);
325325
if (c1 & ~@as(u21, 0x03ff) != 0xdc00) return error.ExpectedSecondSurrogateHalf;
@@ -328,7 +328,6 @@ pub const Utf16LeIterator = struct {
328328
} else if (c0 & ~@as(u21, 0x03ff) == 0xdc00) {
329329
return error.UnexpectedSecondSurrogateHalf;
330330
} else {
331-
it.i += 2;
332331
return c0;
333332
}
334333
}
@@ -769,6 +768,48 @@ fn calcUtf16LeLen(utf8: []const u8) usize {
769768
return dest_len;
770769
}
771770

771+
/// Print the given `utf16le` string
772+
fn formatUtf16le(
773+
utf16le: []const u16,
774+
comptime fmt: []const u8,
775+
options: std.fmt.FormatOptions,
776+
writer: anytype,
777+
) !void {
778+
const unknown_codepoint = 0xfffd;
779+
_ = fmt;
780+
_ = options;
781+
var buf: [300]u8 = undefined; // just a random size I chose
782+
var it = Utf16LeIterator.init(utf16le);
783+
var u8len: usize = 0;
784+
while (it.nextCodepoint() catch unknown_codepoint) |codepoint| {
785+
u8len += utf8Encode(codepoint, buf[u8len..]) catch
786+
utf8Encode(unknown_codepoint, buf[u8len..]) catch unreachable;
787+
if (u8len + 3 >= buf.len) {
788+
try writer.writeAll(buf[0..u8len]);
789+
u8len = 0;
790+
}
791+
}
792+
try writer.writeAll(buf[0..u8len]);
793+
}
794+
795+
/// Return a Formatter for a Utf16le string
796+
pub fn fmtUtf16le(utf16le: []const u16) std.fmt.Formatter(formatUtf16le) {
797+
return .{ .data = utf16le };
798+
}
799+
800+
test "fmtUtf16le" {
801+
const expectFmt = std.testing.expectFmt;
802+
try expectFmt("", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral(""))});
803+
try expectFmt("foo", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("foo"))});
804+
try expectFmt("𐐷", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("𐐷"))});
805+
try expectFmt("퟿", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\xff\xd7")})});
806+
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\x00\xd8")})});
807+
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\xff\xdb")})});
808+
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\x00\xdc")})});
809+
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\xff\xdf")})});
810+
try expectFmt("", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\x00\xe0")})});
811+
}
812+
772813
test "utf8ToUtf16LeStringLiteral" {
773814
{
774815
const bytes = [_:0]u16{

0 commit comments

Comments
 (0)