Skip to content

Commit d8e7eda

Browse files
committed
std.zig.fmtId: conditionally escape primitives/_
This is a breaking change. This updates `std.zig.fmtId` to support conditionally escaping primitives and the reserved `_` identifier via format specifiers: - `{}`: escape invalid identifiers, identifiers that shadow primitives and the reserved `_` identifier. - `{p}`: same as `{}`, but don't escape identifiers that shadow primitives. - `{_}`: same as `{}`, but don't escape the reserved `_` identifier. - `{p_}` or `{_p}`: only escape invalid identifiers. (The idea is that `p`/`_` mean "allow primitives/underscores".) Any other format specifiers will result in compile errors. Additionally, `isValidId` now considers `_` a valid identifier. If this distinction is important, consider combining existing uses of this function with the new `isUnderscore` function.
1 parent 129de47 commit d8e7eda

File tree

1 file changed

+86
-11
lines changed

1 file changed

+86
-11
lines changed

lib/std/zig.zig

Lines changed: 86 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pub const Tokenizer = tokenizer.Tokenizer;
1010
pub const string_literal = @import("zig/string_literal.zig");
1111
pub const number_literal = @import("zig/number_literal.zig");
1212
pub const primitives = @import("zig/primitives.zig");
13+
pub const isPrimitive = primitives.isPrimitive;
1314
pub const Ast = @import("zig/Ast.zig");
1415
pub const AstGen = @import("zig/AstGen.zig");
1516
pub const Zir = @import("zig/Zir.zig");
@@ -728,20 +729,87 @@ const tokenizer = @import("zig/tokenizer.zig");
728729
const assert = std.debug.assert;
729730
const Allocator = std.mem.Allocator;
730731

731-
/// Return a Formatter for a Zig identifier
732+
/// Return a Formatter for a Zig identifier, escaping it with `@""` syntax if needed.
733+
///
734+
/// - An empty `{}` format specifier escapes invalid identifiers, identifiers that shadow primitives
735+
/// and the reserved `_` identifier.
736+
/// - Add `p` to the specifier to render identifiers that shadow primitives unescaped.
737+
/// - Add `_` to the specifier to render the reserved `_` identifier unescaped.
738+
/// - `p` and `_` can be combined, e.g. `{p_}`.
739+
///
732740
pub fn fmtId(bytes: []const u8) std.fmt.Formatter(formatId) {
733741
return .{ .data = bytes };
734742
}
735743

736-
/// Print the string as a Zig identifier escaping it with @"" syntax if needed.
744+
test fmtId {
745+
const expectFmt = std.testing.expectFmt;
746+
try expectFmt("@\"while\"", "{}", .{fmtId("while")});
747+
try expectFmt("@\"while\"", "{p}", .{fmtId("while")});
748+
try expectFmt("@\"while\"", "{_}", .{fmtId("while")});
749+
try expectFmt("@\"while\"", "{p_}", .{fmtId("while")});
750+
try expectFmt("@\"while\"", "{_p}", .{fmtId("while")});
751+
752+
try expectFmt("hello", "{}", .{fmtId("hello")});
753+
try expectFmt("hello", "{p}", .{fmtId("hello")});
754+
try expectFmt("hello", "{_}", .{fmtId("hello")});
755+
try expectFmt("hello", "{p_}", .{fmtId("hello")});
756+
try expectFmt("hello", "{_p}", .{fmtId("hello")});
757+
758+
try expectFmt("@\"type\"", "{}", .{fmtId("type")});
759+
try expectFmt("type", "{p}", .{fmtId("type")});
760+
try expectFmt("@\"type\"", "{_}", .{fmtId("type")});
761+
try expectFmt("type", "{p_}", .{fmtId("type")});
762+
try expectFmt("type", "{_p}", .{fmtId("type")});
763+
764+
try expectFmt("@\"_\"", "{}", .{fmtId("_")});
765+
try expectFmt("@\"_\"", "{p}", .{fmtId("_")});
766+
try expectFmt("_", "{_}", .{fmtId("_")});
767+
try expectFmt("_", "{p_}", .{fmtId("_")});
768+
try expectFmt("_", "{_p}", .{fmtId("_")});
769+
770+
try expectFmt("@\"i123\"", "{}", .{fmtId("i123")});
771+
try expectFmt("i123", "{p}", .{fmtId("i123")});
772+
try expectFmt("@\"4four\"", "{}", .{fmtId("4four")});
773+
try expectFmt("_underscore", "{}", .{fmtId("_underscore")});
774+
try expectFmt("@\"11\\\"23\"", "{}", .{fmtId("11\"23")});
775+
try expectFmt("@\"11\\x0f23\"", "{}", .{fmtId("11\x0F23")});
776+
777+
// These are technically not currently legal in Zig.
778+
try expectFmt("@\"\"", "{}", .{fmtId("")});
779+
try expectFmt("@\"\\x00\"", "{}", .{fmtId("\x00")});
780+
}
781+
782+
/// Print the string as a Zig identifier, escaping it with `@""` syntax if needed.
737783
fn formatId(
738784
bytes: []const u8,
739-
comptime unused_format_string: []const u8,
785+
comptime fmt: []const u8,
740786
options: std.fmt.FormatOptions,
741787
writer: anytype,
742788
) !void {
743-
_ = unused_format_string;
744-
if (isValidId(bytes)) {
789+
const allow_primitive, const allow_underscore = comptime parse_fmt: {
790+
var allow_primitive = false;
791+
var allow_underscore = false;
792+
for (fmt) |char| {
793+
switch (char) {
794+
'p' => if (!allow_primitive) {
795+
allow_primitive = true;
796+
continue;
797+
},
798+
'_' => if (!allow_underscore) {
799+
allow_underscore = true;
800+
continue;
801+
},
802+
else => {},
803+
}
804+
@compileError("expected {}, {p}, {_}, {p_} or {_p}, found {" ++ fmt ++ "}");
805+
}
806+
break :parse_fmt .{ allow_primitive, allow_underscore };
807+
};
808+
809+
if (isValidId(bytes) and
810+
(allow_primitive or !std.zig.isPrimitive(bytes)) and
811+
(allow_underscore or !isUnderscore(bytes)))
812+
{
745813
return writer.writeAll(bytes);
746814
}
747815
try writer.writeAll("@\"");
@@ -757,12 +825,8 @@ pub fn fmtEscapes(bytes: []const u8) std.fmt.Formatter(stringEscape) {
757825
return .{ .data = bytes };
758826
}
759827

760-
test "escape invalid identifiers" {
828+
test fmtEscapes {
761829
const expectFmt = std.testing.expectFmt;
762-
try expectFmt("@\"while\"", "{}", .{fmtId("while")});
763-
try expectFmt("hello", "{}", .{fmtId("hello")});
764-
try expectFmt("@\"11\\\"23\"", "{}", .{fmtId("11\"23")});
765-
try expectFmt("@\"11\\x0f23\"", "{}", .{fmtId("11\x0F23")});
766830
try expectFmt("\\x0f", "{}", .{fmtEscapes("\x0f")});
767831
try expectFmt(
768832
\\" \\ hi \x07 \x11 " derp \'"
@@ -816,7 +880,6 @@ pub fn stringEscape(
816880

817881
pub fn isValidId(bytes: []const u8) bool {
818882
if (bytes.len == 0) return false;
819-
if (std.mem.eql(u8, bytes, "_")) return false;
820883
for (bytes, 0..) |c, i| {
821884
switch (c) {
822885
'_', 'a'...'z', 'A'...'Z' => {},
@@ -836,6 +899,18 @@ test isValidId {
836899
try std.testing.expect(isValidId("i386"));
837900
}
838901

902+
pub fn isUnderscore(bytes: []const u8) bool {
903+
return bytes.len == 1 and bytes[0] == '_';
904+
}
905+
906+
test isUnderscore {
907+
try std.testing.expect(isUnderscore("_"));
908+
try std.testing.expect(!isUnderscore("__"));
909+
try std.testing.expect(!isUnderscore("_foo"));
910+
try std.testing.expect(isUnderscore("\x5f"));
911+
try std.testing.expect(!isUnderscore("\\x5f"));
912+
}
913+
839914
pub fn readSourceFileToEndAlloc(
840915
allocator: Allocator,
841916
input: std.fs.File,

0 commit comments

Comments
 (0)