Skip to content

Commit dc243f6

Browse files
committed
Rework and comptime-generate the lookup table
1 parent 18f8a14 commit dc243f6

File tree

1 file changed

+116
-140
lines changed

1 file changed

+116
-140
lines changed

lib/std/ascii.zig

Lines changed: 116 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -55,184 +55,157 @@ pub const control_code = struct {
5555
pub const XOFF = 0x13;
5656
};
5757

58-
const tIndex = enum(u3) {
59-
Alpha,
60-
Hex,
61-
Space,
62-
Digit,
63-
Lower,
64-
Upper,
65-
// Ctrl, < 0x20 || == DEL
66-
// Print, = Graph || == ' '. NOT '\t' et cetera
67-
Punct,
68-
Graph,
69-
//ASCII, | ~0b01111111
70-
//isBlank, == ' ' || == '\x09'
71-
};
58+
// These naive functions are used to generate the lookup table
59+
// and they're used as fallbacks for if the lookup table isn't available.
60+
//
61+
// Note that some functions like for example `isDigit` don't use a table because it's slower.
62+
// Using a table is generally only useful if not all `true` values in the table would be in one row.
63+
64+
fn isCntrlNaive(c: u8) bool {
65+
return c <= control_code.US or c == control_code.DEL;
66+
}
67+
fn isAlphaNaive(c: u8) bool {
68+
return isLower(c) or isUpper(c);
69+
}
70+
fn isXDigitNaive(c: u8) bool {
71+
return isDigit(c) or
72+
(c >= 'a' and c <= 'f') or
73+
(c >= 'A' and c <= 'F');
74+
}
75+
fn isAlNumNaive(c: u8) bool {
76+
return isDigit(c) or isAlphaNaive(c);
77+
}
78+
fn isPunctNaive(c: u8) bool {
79+
@setEvalBranchQuota(3000);
80+
return (c >= '!' and c <= '/') or
81+
(c >= '[' and c <= '`') or
82+
(c >= '{' and c <= '~') or
83+
(c >= ':' and c <= '@');
84+
}
85+
fn isSpaceNaive(c: u8) bool {
86+
@setEvalBranchQuota(5000);
87+
return std.mem.indexOfScalar(u8, &spaces, c) != null;
88+
}
89+
90+
/// A lookup table.
91+
const CombinedTable = struct {
92+
table: [256]u8,
93+
94+
const Index = enum {
95+
control,
96+
alphabetic,
97+
hexadecimal,
98+
alphanumeric,
99+
punct,
100+
spaces,
101+
};
72102

73-
const combinedTable = init: {
74-
comptime var table: [256]u8 = undefined;
103+
/// Generates a table which is filled with the results of the given function for all characters.
104+
fn getBoolTable(comptime condition: fn (u8) bool) [128]bool {
105+
@setEvalBranchQuota(2000);
106+
comptime var table: [128]bool = undefined;
107+
comptime var index = 0;
108+
while (index < 128) : (index += 1) {
109+
table[index] = condition(index);
110+
}
111+
return table;
112+
}
75113

76-
const mem = std.mem;
114+
fn init() CombinedTable {
115+
comptime var table: [256]u8 = undefined;
116+
117+
const control_table = comptime getBoolTable(isCntrlNaive);
118+
const alpha_table = comptime getBoolTable(isAlphaNaive);
119+
const hex_table = comptime getBoolTable(isXDigitNaive);
120+
const alphanumeric_table = comptime getBoolTable(isAlNumNaive);
121+
const punct_table = comptime getBoolTable(isPunctNaive);
122+
const whitespace_table = comptime getBoolTable(isSpaceNaive);
123+
124+
comptime var i = 0;
125+
inline while (i < 128) : (i += 1) {
126+
table[i] =
127+
@boolToInt(control_table[i]) << @enumToInt(Index.control) |
128+
@boolToInt(alpha_table[i]) << @enumToInt(Index.alphabetic) |
129+
@boolToInt(hex_table[i]) << @enumToInt(Index.hexadecimal) |
130+
@boolToInt(alphanumeric_table[i]) << @enumToInt(Index.alphanumeric) |
131+
@boolToInt(punct_table[i]) << @enumToInt(Index.punct) |
132+
@boolToInt(whitespace_table[i]) << @enumToInt(Index.spaces);
133+
}
77134

78-
const alpha = [_]u1{
79-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
80-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
82-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
135+
std.mem.set(u8, table[128..256], 0);
84136

85-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
87-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
89-
};
90-
const lower = [_]u1{
91-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
92-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96-
97-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
101-
};
102-
const upper = [_]u1{
103-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
104-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108-
109-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
111-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
112-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113-
};
114-
const digit = [_]u1{
115-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
116-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
120-
121-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125-
};
126-
const hex = [_]u1{
127-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
128-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
132-
133-
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
134-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
135-
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137-
};
138-
const space = [_]u1{
139-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
140-
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
141-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
142-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
143-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
144-
145-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
148-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
149-
};
150-
const punct = [_]u1{
151-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
152-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
153-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
155-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
156-
157-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
159-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
161-
};
162-
const graph = [_]u1{
163-
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
164-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168-
169-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
173-
};
137+
return .{ .table = table };
138+
}
174139

175-
comptime var i = 0;
176-
inline while (i < 128) : (i += 1) {
177-
table[i] =
178-
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
179-
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
180-
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
181-
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
182-
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
183-
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
184-
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
185-
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
140+
fn contains(self: CombinedTable, c: u8, index: Index) bool {
141+
return (self.table[c] & (@as(u8, 1) << @enumToInt(index))) != 0;
186142
}
187-
mem.set(u8, table[128..256], 0);
188-
break :init table;
189143
};
190144

191-
fn inTable(c: u8, t: tIndex) bool {
192-
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
193-
}
145+
/// The combined table for fast lookup.
146+
///
147+
/// This is not used in `ReleaseSmall` to save 256 bytes at the cost of
148+
/// a small decrease in performance.
149+
const combined_table: ?CombinedTable = if (@import("builtin").mode == .ReleaseSmall)
150+
null
151+
else
152+
CombinedTable.init();
194153

195154
/// Returns whether the character is alphanumeric. This is case-insensitive.
196155
pub fn isAlNum(c: u8) bool {
197-
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
198-
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
156+
if (combined_table) |table|
157+
return table.contains(c, .alphanumeric)
158+
else
159+
return isAlNumNaive(c);
199160
}
200161

201162
/// Returns whether the character is alphabetic. This is case-insensitive.
202163
pub fn isAlpha(c: u8) bool {
203-
return inTable(c, tIndex.Alpha);
164+
if (combined_table) |table|
165+
return table.contains(c, .alphabetic)
166+
else
167+
return isAlphaNaive(c);
204168
}
205169

206170
/// Returns whether the character is a control character.
207171
///
208172
/// See also: `control`
209173
pub fn isCntrl(c: u8) bool {
210-
return c < 0x20 or c == 127; //DEL
174+
if (combined_table) |table|
175+
return table.contains(c, .control)
176+
else
177+
return isCntrlNaive(c);
211178
}
212179

213180
pub fn isDigit(c: u8) bool {
214-
return inTable(c, tIndex.Digit);
181+
return c >= '0' and c <= '9';
215182
}
216183

217184
pub fn isGraph(c: u8) bool {
218-
return inTable(c, tIndex.Graph);
185+
return isPrint(c) and c != ' ';
219186
}
220187

221188
pub fn isLower(c: u8) bool {
222-
return inTable(c, tIndex.Lower);
189+
return c >= 'a' and c <= 'z';
223190
}
224191

225192
/// Returns whether the character has some graphical representation and can be printed.
226193
pub fn isPrint(c: u8) bool {
227-
return inTable(c, tIndex.Graph) or c == ' ';
194+
return c >= ' ' and c <= '~';
228195
}
229196

230197
pub fn isPunct(c: u8) bool {
231-
return inTable(c, tIndex.Punct);
198+
if (combined_table) |table|
199+
return table.contains(c, .punct)
200+
else
201+
return isPunctNaive(c);
232202
}
233203

234204
pub fn isSpace(c: u8) bool {
235-
return inTable(c, tIndex.Space);
205+
if (combined_table) |table|
206+
return table.contains(c, .spaces)
207+
else
208+
return isSpaceNaive(c);
236209
}
237210

238211
/// All the values for which `isSpace()` returns `true`.
@@ -250,12 +223,15 @@ test "spaces" {
250223
}
251224

252225
pub fn isUpper(c: u8) bool {
253-
return inTable(c, tIndex.Upper);
226+
return c >= 'A' and c <= 'Z';
254227
}
255228

256229
/// Returns whether the character is a hexadecimal digit. This is case-insensitive.
257230
pub fn isXDigit(c: u8) bool {
258-
return inTable(c, tIndex.Hex);
231+
if (combined_table) |table|
232+
return table.contains(c, .hexadecimal)
233+
else
234+
return isXDigitNaive(c);
259235
}
260236

261237
pub fn isASCII(c: u8) bool {

0 commit comments

Comments
 (0)