Skip to content

Commit 6f8e8e0

Browse files
committed
fix(perf): remove LUT
This makes it so that we no longer use a LUT (Look-Up Table): * The code is much simpler and easier to understand now. * Using a LUT means we rely on a warm cache. Relying on the cache like this results in inconsistent performance and in many cases codegen will be worse. Also as @topolarity once pointed out, in some cases while it seems like the code may branch, it actually doesn't: #11629 (comment) * Other languages' standard libraries don't do this either. JFF I wanted to see what other languages codegen compared to us now: https://rust.godbolt.org/z/Te4ax9Edf, https://zig.godbolt.org/z/nTbYedWKv So we are pretty much on par or better than other languages now.
1 parent 3a6b499 commit 6f8e8e0

File tree

1 file changed

+35
-151
lines changed

1 file changed

+35
-151
lines changed

lib/std/ascii.zig

Lines changed: 35 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -88,152 +88,20 @@ pub const control_code = struct {
8888
pub const xoff = dc3;
8989
};
9090

91-
const tIndex = enum(u3) {
92-
Alpha,
93-
Hex,
94-
Space,
95-
Digit,
96-
Lower,
97-
Upper,
98-
// Ctrl, < 0x20 || == DEL
99-
// Print, = Graph || == ' '. NOT '\t' et cetera
100-
Punct,
101-
Graph,
102-
//ASCII, | ~0b01111111
103-
//isBlank, == ' ' || == '\x09'
104-
};
105-
106-
const combinedTable = init: {
107-
comptime var table: [256]u8 = undefined;
108-
109-
const mem = std.mem;
110-
111-
const alpha = [_]u1{
112-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
113-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117-
118-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
119-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
120-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
121-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
122-
};
123-
const lower = [_]u1{
124-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
125-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129-
130-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
134-
};
135-
const upper = [_]u1{
136-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
137-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141-
142-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
144-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146-
};
147-
const digit = [_]u1{
148-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
149-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
152-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
153-
154-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
156-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158-
};
159-
const hex = [_]u1{
160-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
161-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
165-
166-
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168-
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
169-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170-
};
171-
const space = [_]u1{
172-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
173-
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
174-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177-
178-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
182-
};
183-
const punct = [_]u1{
184-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
185-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
186-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
187-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
188-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
189-
190-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
192-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
194-
};
195-
const graph = [_]u1{
196-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
197-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
200-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
201-
202-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
203-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
204-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
205-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
206-
};
207-
208-
comptime var i = 0;
209-
inline while (i < 128) : (i += 1) {
210-
table[i] =
211-
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
212-
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
213-
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
214-
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
215-
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
216-
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
217-
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
218-
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
219-
}
220-
mem.set(u8, table[128..256], 0);
221-
break :init table;
222-
};
223-
224-
fn inTable(c: u8, t: tIndex) bool {
225-
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
226-
}
227-
228-
/// Returns whether the character is alphanumeric.
91+
/// Returns whether the character is alphanumeric: A-Z, a-z, or 0-9.
22992
pub fn isAlphanumeric(c: u8) bool {
230-
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
231-
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
93+
return switch (c) {
94+
'A'...'Z', 'a'...'z', '0'...'9' => true,
95+
else => false,
96+
};
23297
}
23398

234-
/// Returns whether the character is alphabetic.
99+
/// Returns whether the character is alphabetic: A-Z or a-z.
235100
pub fn isAlphabetic(c: u8) bool {
236-
return inTable(c, tIndex.Alpha);
101+
return switch (c) {
102+
'A'...'Z', 'a'...'z' => true,
103+
else => false,
104+
};
237105
}
238106

239107
/// Returns whether the character is a control character.
@@ -246,24 +114,33 @@ pub fn isControl(c: u8) bool {
246114

247115
/// Returns whether the character is a digit.
248116
pub fn isDigit(c: u8) bool {
249-
return inTable(c, tIndex.Digit);
117+
return switch (c) {
118+
'0'...'9' => true,
119+
else => false,
120+
};
250121
}
251122

252-
/// Returns whether the character is a lowercased letter.
123+
/// Returns whether the character is a lowercase letter.
253124
pub fn isLower(c: u8) bool {
254-
return inTable(c, tIndex.Lower);
125+
return switch (c) {
126+
'a'...'z' => true,
127+
else => false,
128+
};
255129
}
256130

257131
/// Returns whether the character is printable and has some graphical representation.
258132
/// This also returns `true` for the space character.
259133
/// This is the same as `!isControl(c)`.
260134
pub fn isPrint(c: u8) bool {
261-
return inTable(c, tIndex.Graph) or c == ' ';
135+
return !isControl(c);
262136
}
263137

264138
/// Returns whether this character is included in `whitespace`.
265139
pub fn isWhitespace(c: u8) bool {
266-
return inTable(c, tIndex.Space);
140+
return for (whitespace) |other| {
141+
if (c == other)
142+
break true;
143+
} else false;
267144
}
268145

269146
/// Whitespace for general use.
@@ -281,14 +158,20 @@ test "whitespace" {
281158
}
282159
}
283160

284-
/// Returns whether the character is an uppercased letter.
161+
/// Returns whether the character is an uppercase letter.
285162
pub fn isUpper(c: u8) bool {
286-
return inTable(c, tIndex.Upper);
163+
return switch (c) {
164+
'A'...'Z' => true,
165+
else => false,
166+
};
287167
}
288168

289-
/// Returns whether the character is a hexadecimal digit. Case-insensitive.
169+
/// Returns whether the character is a hexadecimal digit: A-F, a-f, or 0-9.
290170
pub fn isHex(c: u8) bool {
291-
return inTable(c, tIndex.Hex);
171+
return switch (c) {
172+
'A'...'F', 'a'...'f', '0'...'9' => true,
173+
else => false,
174+
};
292175
}
293176

294177
/// Returns whether the character is a 7-bit ASCII character.
@@ -351,6 +234,7 @@ test "ASCII character classes" {
351234

352235
try testing.expect(!isHex('g'));
353236
try testing.expect(isHex('b'));
237+
try testing.expect(isHex('F'));
354238
try testing.expect(isHex('9'));
355239

356240
try testing.expect(!isDigit('~'));

0 commit comments

Comments
 (0)