@@ -6,7 +6,7 @@ const mem = std.mem;
6
6
7
7
/// Returns how many bytes the UTF-8 representation would require
8
8
/// for the given codepoint.
9
- pub fn utf8CodepointSequenceLength (c : u32 ) ! u3 {
9
+ pub fn utf8CodepointSequenceLength (c : u21 ) ! u3 {
10
10
if (c < 0x80 ) return @as (u3 , 1 );
11
11
if (c < 0x800 ) return @as (u3 , 2 );
12
12
if (c < 0x10000 ) return @as (u3 , 3 );
@@ -32,7 +32,7 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {
32
32
/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).
33
33
/// Errors: if c cannot be encoded in UTF-8.
34
34
/// Returns: the number of bytes written to out.
35
- pub fn utf8Encode (c : u32 , out : []u8 ) ! u3 {
35
+ pub fn utf8Encode (c : u21 , out : []u8 ) ! u3 {
36
36
const length = try utf8CodepointSequenceLength (c );
37
37
assert (out .len >= length );
38
38
switch (length ) {
@@ -68,9 +68,9 @@ const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error
68
68
/// bytes.len must be equal to utf8ByteSequenceLength(bytes[0]) catch unreachable.
69
69
/// If you already know the length at comptime, you can call one of
70
70
/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
71
- pub fn utf8Decode (bytes : []const u8 ) Utf8DecodeError ! u32 {
71
+ pub fn utf8Decode (bytes : []const u8 ) Utf8DecodeError ! u21 {
72
72
return switch (bytes .len ) {
73
- 1 = > @as (u32 , bytes [0 ]),
73
+ 1 = > @as (u21 , bytes [0 ]),
74
74
2 = > utf8Decode2 (bytes ),
75
75
3 = > utf8Decode3 (bytes ),
76
76
4 = > utf8Decode4 (bytes ),
@@ -82,10 +82,10 @@ const Utf8Decode2Error = error{
82
82
Utf8ExpectedContinuation ,
83
83
Utf8OverlongEncoding ,
84
84
};
85
- pub fn utf8Decode2 (bytes : []const u8 ) Utf8Decode2Error ! u32 {
85
+ pub fn utf8Decode2 (bytes : []const u8 ) Utf8Decode2Error ! u21 {
86
86
assert (bytes .len == 2 );
87
87
assert (bytes [0 ] & 0b11100000 == 0b11000000 );
88
- var value : u32 = bytes [0 ] & 0b00011111 ;
88
+ var value : u21 = bytes [0 ] & 0b00011111 ;
89
89
90
90
if (bytes [1 ] & 0b11000000 != 0b10000000 ) return error .Utf8ExpectedContinuation ;
91
91
value <<= 6 ;
@@ -101,10 +101,10 @@ const Utf8Decode3Error = error{
101
101
Utf8OverlongEncoding ,
102
102
Utf8EncodesSurrogateHalf ,
103
103
};
104
- pub fn utf8Decode3 (bytes : []const u8 ) Utf8Decode3Error ! u32 {
104
+ pub fn utf8Decode3 (bytes : []const u8 ) Utf8Decode3Error ! u21 {
105
105
assert (bytes .len == 3 );
106
106
assert (bytes [0 ] & 0b11110000 == 0b11100000 );
107
- var value : u32 = bytes [0 ] & 0b00001111 ;
107
+ var value : u21 = bytes [0 ] & 0b00001111 ;
108
108
109
109
if (bytes [1 ] & 0b11000000 != 0b10000000 ) return error .Utf8ExpectedContinuation ;
110
110
value <<= 6 ;
@@ -125,10 +125,10 @@ const Utf8Decode4Error = error{
125
125
Utf8OverlongEncoding ,
126
126
Utf8CodepointTooLarge ,
127
127
};
128
- pub fn utf8Decode4 (bytes : []const u8 ) Utf8Decode4Error ! u32 {
128
+ pub fn utf8Decode4 (bytes : []const u8 ) Utf8Decode4Error ! u21 {
129
129
assert (bytes .len == 4 );
130
130
assert (bytes [0 ] & 0b11111000 == 0b11110000 );
131
- var value : u32 = bytes [0 ] & 0b00000111 ;
131
+ var value : u21 = bytes [0 ] & 0b00000111 ;
132
132
133
133
if (bytes [1 ] & 0b11000000 != 0b10000000 ) return error .Utf8ExpectedContinuation ;
134
134
value <<= 6 ;
@@ -224,11 +224,11 @@ pub const Utf8Iterator = struct {
224
224
return it .bytes [it .i - cp_len .. it .i ];
225
225
}
226
226
227
- pub fn nextCodepoint (it : * Utf8Iterator ) ? u32 {
227
+ pub fn nextCodepoint (it : * Utf8Iterator ) ? u21 {
228
228
const slice = it .nextCodepointSlice () orelse return null ;
229
229
230
230
switch (slice .len ) {
231
- 1 = > return @as (u32 , slice [0 ]),
231
+ 1 = > return @as (u21 , slice [0 ]),
232
232
2 = > return utf8Decode2 (slice ) catch unreachable ,
233
233
3 = > return utf8Decode3 (slice ) catch unreachable ,
234
234
4 = > return utf8Decode4 (slice ) catch unreachable ,
@@ -248,19 +248,19 @@ pub const Utf16LeIterator = struct {
248
248
};
249
249
}
250
250
251
- pub fn nextCodepoint (it : * Utf16LeIterator ) ! ? u32 {
251
+ pub fn nextCodepoint (it : * Utf16LeIterator ) ! ? u21 {
252
252
assert (it .i <= it .bytes .len );
253
253
if (it .i == it .bytes .len ) return null ;
254
- const c0 : u32 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
255
- if (c0 & ~ @as (u32 , 0x03ff ) == 0xd800 ) {
254
+ const c0 : u21 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
255
+ if (c0 & ~ @as (u21 , 0x03ff ) == 0xd800 ) {
256
256
// surrogate pair
257
257
it .i += 2 ;
258
258
if (it .i >= it .bytes .len ) return error .DanglingSurrogateHalf ;
259
- const c1 : u32 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
260
- if (c1 & ~ @as (u32 , 0x03ff ) != 0xdc00 ) return error .ExpectedSecondSurrogateHalf ;
259
+ const c1 : u21 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
260
+ if (c1 & ~ @as (u21 , 0x03ff ) != 0xdc00 ) return error .ExpectedSecondSurrogateHalf ;
261
261
it .i += 2 ;
262
262
return 0x10000 + (((c0 & 0x03ff ) << 10 ) | (c1 & 0x03ff ));
263
- } else if (c0 & ~ @as (u32 , 0x03ff ) == 0xdc00 ) {
263
+ } else if (c0 & ~ @as (u21 , 0x03ff ) == 0xdc00 ) {
264
264
return error .UnexpectedSecondSurrogateHalf ;
265
265
} else {
266
266
it .i += 2 ;
@@ -304,10 +304,10 @@ fn testUtf8EncodeError() void {
304
304
testErrorEncode (0xd800 , array [0.. ], error .Utf8CannotEncodeSurrogateHalf );
305
305
testErrorEncode (0xdfff , array [0.. ], error .Utf8CannotEncodeSurrogateHalf );
306
306
testErrorEncode (0x110000 , array [0.. ], error .CodepointTooLarge );
307
- testErrorEncode (0xffffffff , array [0.. ], error .CodepointTooLarge );
307
+ testErrorEncode (0x1fffff , array [0.. ], error .CodepointTooLarge );
308
308
}
309
309
310
- fn testErrorEncode (codePoint : u32 , array : []u8 , expectedErr : anyerror ) void {
310
+ fn testErrorEncode (codePoint : u21 , array : []u8 , expectedErr : anyerror ) void {
311
311
testing .expectError (expectedErr , utf8Encode (codePoint , array ));
312
312
}
313
313
@@ -455,11 +455,11 @@ fn testError(bytes: []const u8, expected_err: anyerror) void {
455
455
testing .expectError (expected_err , testDecode (bytes ));
456
456
}
457
457
458
- fn testValid (bytes : []const u8 , expected_codepoint : u32 ) void {
458
+ fn testValid (bytes : []const u8 , expected_codepoint : u21 ) void {
459
459
testing .expect ((testDecode (bytes ) catch unreachable ) == expected_codepoint );
460
460
}
461
461
462
- fn testDecode (bytes : []const u8 ) ! u32 {
462
+ fn testDecode (bytes : []const u8 ) ! u21 {
463
463
const length = try utf8ByteSequenceLength (bytes [0 ]);
464
464
if (bytes .len < length ) return error .UnexpectedEof ;
465
465
testing .expect (bytes .len == length );
0 commit comments