Skip to content

Commit 4f75f10

Browse files
committed
Unicode (utf-8) rework
* Make the errors clearer. (the return error types are not set because that reveals bugs in stage1) * Make decode and length same operation * Give location of invalid character * Always report errors in Utf8View
1 parent 499df96 commit 4f75f10

File tree

3 files changed

+194
-181
lines changed

3 files changed

+194
-181
lines changed

lib/std/json.zig

+7-6
Original file line numberDiff line numberDiff line change
@@ -2099,7 +2099,7 @@ fn unescapeString(output: []u8, input: []const u8) !void {
20992099
inIndex += 6;
21002100
} else |err| {
21012101
// it might be a surrogate pair
2102-
if (err != error.Utf8CannotEncodeSurrogateHalf) {
2102+
if (err != error.UnicodeSurrogateHalf) {
21032103
return error.InvalidUnicodeHexSymbol;
21042104
}
21052105
// check if a second code unit is present
@@ -2532,15 +2532,16 @@ pub fn stringify(
25322532
'\r' => try out_stream.writeAll("\\r"),
25332533
'\t' => try out_stream.writeAll("\\t"),
25342534
else => {
2535-
const ulen = std.unicode.utf8ByteSequenceLength(value[i]) catch unreachable;
25362535
// control characters (only things left with 1 byte length) should always be printed as unicode escapes
2537-
if (ulen == 1 or options.string.String.escape_unicode) {
2538-
const codepoint = std.unicode.utf8Decode(value[i .. i + ulen]) catch unreachable;
2539-
try outputUnicodeEscape(codepoint, out_stream);
2536+
if ((value[i] < 128) or options.string.String.escape_unicode) {
2537+
const c = std.unicode.utf8Decode(value[i..]) catch unreachable;
2538+
try outputUnicodeEscape(c.codepoint, out_stream);
2539+
i += c.utf8len - 1;
25402540
} else {
2541+
var ulen = std.unicode.utf8ByteSequenceLength(value[i]) catch unreachable;
25412542
try out_stream.writeAll(value[i .. i + ulen]);
2543+
i += ulen - 1;
25422544
}
2543-
i += ulen - 1;
25442545
},
25452546
}
25462547
}

lib/std/process.zig

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ pub const GetEnvVarOwnedError = error{
156156
pub fn getEnvVarOwned(allocator: *mem.Allocator, key: []const u8) GetEnvVarOwnedError![]u8 {
157157
if (builtin.os.tag == .windows) {
158158
const result_w = blk: {
159-
const key_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, key);
159+
const key_w = std.unicode.utf8ToUtf16LeWithNull(allocator, key) catch return error.InvalidUtf8;
160160
defer allocator.free(key_w);
161161

162162
break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound;

0 commit comments

Comments
 (0)