@@ -7,6 +7,7 @@ const std = @import("std.zig");
7
7
const math = std .math ;
8
8
const assert = std .debug .assert ;
9
9
const mem = std .mem ;
10
+ const unicode = std .unicode ;
10
11
const builtin = @import ("builtin" );
11
12
const errol = @import ("fmt/errol.zig" );
12
13
const lossyCast = std .math .lossyCast ;
@@ -76,6 +77,7 @@ fn peekIsAlign(comptime fmt: []const u8) bool {
76
77
/// - `b`: output integer value in binary notation
77
78
/// - `o`: output integer value in octal notation
78
79
/// - `c`: output integer as an ASCII character. Integer type must have 8 bits at max.
80
+ /// - `u`: output integer as an UTF-8 sequence. Integer type must have 21 bits at max.
79
81
/// - `*`: output the address of the value instead of the value itself.
80
82
///
81
83
/// If a formatted user type contains a function of the type
@@ -555,6 +557,12 @@ pub fn formatIntValue(
555
557
} else {
556
558
@compileError ("Cannot escape character with more than 8 bits" );
557
559
}
560
+ } else if (comptime std .mem .eql (u8 , fmt , "u" )) {
561
+ if (@typeInfo (@TypeOf (int_value )).Int .bits <= 21 ) {
562
+ return formatUnicodeCodepoint (@as (u21 , int_value ), options , writer );
563
+ } else {
564
+ @compileError ("Cannot print integer that is larger than 21 bits as an UTF-8 sequence" );
565
+ }
558
566
} else if (comptime std .mem .eql (u8 , fmt , "b" )) {
559
567
radix = 2 ;
560
568
uppercase = false ;
@@ -641,30 +649,54 @@ pub fn formatAsciiChar(
641
649
return writer .writeAll (@as (* const [1 ]u8 , & c ));
642
650
}
643
651
652
+ pub fn formatUnicodeCodepoint (
653
+ c : u21 ,
654
+ options : FormatOptions ,
655
+ writer : anytype ,
656
+ ) ! void {
657
+ var buf : [4 ]u8 = undefined ;
658
+ const len = std .unicode .utf8Encode (c , & buf ) catch | err | switch (err ) {
659
+ error .Utf8CannotEncodeSurrogateHalf , error .CodepointTooLarge = > {
660
+ // In case of error output the replacement char U+FFFD
661
+ return formatBuf (&[_ ]u8 { 0xef , 0xbf , 0xbd }, options , writer );
662
+ },
663
+ };
664
+ return formatBuf (buf [0.. len ], options , writer );
665
+ }
666
+
644
667
pub fn formatBuf (
645
668
buf : []const u8 ,
646
669
options : FormatOptions ,
647
670
writer : anytype ,
648
671
) ! void {
649
- const width = options .width orelse buf .len ;
650
- const padding = if (width > buf .len ) (width - buf .len ) else 0 ;
651
-
652
- switch (options .alignment ) {
653
- .Left = > {
654
- try writer .writeAll (buf );
655
- try writer .writeByteNTimes (options .fill , padding );
656
- },
657
- .Center = > {
658
- const left_padding = padding / 2 ;
659
- const right_padding = (padding + 1 ) / 2 ;
660
- try writer .writeByteNTimes (options .fill , left_padding );
661
- try writer .writeAll (buf );
662
- try writer .writeByteNTimes (options .fill , right_padding );
663
- },
664
- .Right = > {
665
- try writer .writeByteNTimes (options .fill , padding );
666
- try writer .writeAll (buf );
667
- },
672
+ if (options .width ) | min_width | {
673
+ // In case of error assume the buffer content is ASCII-encoded
674
+ const width = unicode .utf8CountCodepoints (buf ) catch | _ | buf .len ;
675
+ const padding = if (width < min_width ) min_width - width else 0 ;
676
+
677
+ if (padding == 0 )
678
+ return writer .writeAll (buf );
679
+
680
+ switch (options .alignment ) {
681
+ .Left = > {
682
+ try writer .writeAll (buf );
683
+ try writer .writeByteNTimes (options .fill , padding );
684
+ },
685
+ .Center = > {
686
+ const left_padding = padding / 2 ;
687
+ const right_padding = (padding + 1 ) / 2 ;
688
+ try writer .writeByteNTimes (options .fill , left_padding );
689
+ try writer .writeAll (buf );
690
+ try writer .writeByteNTimes (options .fill , right_padding );
691
+ },
692
+ .Right = > {
693
+ try writer .writeByteNTimes (options .fill , padding );
694
+ try writer .writeAll (buf );
695
+ },
696
+ }
697
+ } else {
698
+ // Fast path, avoid counting the number of codepoints
699
+ try writer .writeAll (buf );
668
700
}
669
701
}
670
702
@@ -1385,6 +1417,22 @@ test "int.specifier" {
1385
1417
const value : u16 = 0o1234 ;
1386
1418
try testFmt ("u16: 0o1234\n " , "u16: 0o{o}\n " , .{value });
1387
1419
}
1420
+ {
1421
+ const value : u8 = 'a' ;
1422
+ try testFmt ("UTF-8: a\n " , "UTF-8: {u}\n " , .{value });
1423
+ }
1424
+ {
1425
+ const value : u21 = 0x1F310 ;
1426
+ try testFmt ("UTF-8: 🌐\n " , "UTF-8: {u}\n " , .{value });
1427
+ }
1428
+ {
1429
+ const value : u21 = 0xD800 ;
1430
+ try testFmt ("UTF-8: �\n " , "UTF-8: {u}\n " , .{value });
1431
+ }
1432
+ {
1433
+ const value : u21 = 0x110001 ;
1434
+ try testFmt ("UTF-8: �\n " , "UTF-8: {u}\n " , .{value });
1435
+ }
1388
1436
}
1389
1437
1390
1438
test "int.padded" {
@@ -1400,6 +1448,10 @@ test "int.padded" {
1400
1448
try testFmt ("i16: '-12345'" , "i16: '{:4}'" , .{@as (i16 , -12345 )});
1401
1449
try testFmt ("i16: '+12345'" , "i16: '{:4}'" , .{@as (i16 , 12345 )});
1402
1450
try testFmt ("u16: '12345'" , "u16: '{:4}'" , .{@as (u16 , 12345 )});
1451
+
1452
+ try testFmt ("UTF-8: 'ü '" , "UTF-8: '{u:<4}'" , .{'ü' });
1453
+ try testFmt ("UTF-8: ' ü'" , "UTF-8: '{u:>4}'" , .{'ü' });
1454
+ try testFmt ("UTF-8: ' ü '" , "UTF-8: '{u:^4}'" , .{'ü' });
1403
1455
}
1404
1456
1405
1457
test "buffer" {
@@ -1929,6 +1981,9 @@ test "padding" {
1929
1981
try testFmt ("==================Filled" , "{:=>24}" , .{"Filled" });
1930
1982
try testFmt (" Centered " , "{:^24}" , .{"Centered" });
1931
1983
try testFmt ("-" , "{:-^1}" , .{"" });
1984
+ try testFmt ("==crêpe===" , "{:=^10}" , .{"crêpe" });
1985
+ try testFmt ("=====crêpe" , "{:=>10}" , .{"crêpe" });
1986
+ try testFmt ("crêpe=====" , "{:=<10}" , .{"crêpe" });
1932
1987
}
1933
1988
1934
1989
test "decimal float padding" {
0 commit comments