Skip to content

Commit ba01415

Browse files
committed
Add back table usage for some functions
1 parent 2fccf6b commit ba01415

File tree

1 file changed

+55
-37
lines changed

1 file changed

+55
-37
lines changed

lib/std/ascii.zig

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -59,51 +59,59 @@ pub const control_code = struct {
5959
// These naive functions are used to generate the lookup table
6060
// and they're used as fallbacks for if the lookup table isn't available.
6161
//
62-
// Note that some functions like for example `isDigit` don't use a table because it's slower.
63-
// Using a table is generally only useful if not all `true` values in the table would be in one row.
62+
// Note that even some very simple functions like `isDigit` use a table because it avoids
63+
// branching which is slow.
6464

65-
fn isCntrlNaive(c: u8) bool {
66-
return c <= control_code.US or c == control_code.DEL;
67-
}
6865
fn isAlphaNaive(c: u8) bool {
69-
return isLower(c) or isUpper(c);
66+
return isLowerNaive(c) or isUpperNaive(c);
7067
}
7168
fn isXDigitNaive(c: u8) bool {
72-
return isDigit(c) or
69+
return isDigitNaive(c) or
7370
(c >= 'a' and c <= 'f') or
7471
(c >= 'A' and c <= 'F');
7572
}
76-
fn isAlNumNaive(c: u8) bool {
77-
return isDigit(c) or isAlphaNaive(c);
73+
fn isSpaceNaive(c: u8) bool {
74+
return std.mem.indexOfScalar(u8, &spaces, c) != null;
75+
}
76+
fn isDigitNaive(c: u8) bool {
77+
return c >= '0' and c <= '9';
78+
}
79+
fn isLowerNaive(c: u8) bool {
80+
return c >= 'a' and c <= 'z';
81+
}
82+
fn isUpperNaive(c: u8) bool {
83+
return c >= 'A' and c <= 'Z';
7884
}
7985
fn isPunctNaive(c: u8) bool {
80-
@setEvalBranchQuota(3000);
8186
return (c >= '!' and c <= '/') or
8287
(c >= '[' and c <= '`') or
8388
(c >= '{' and c <= '~') or
8489
(c >= ':' and c <= '@');
8590
}
86-
fn isSpaceNaive(c: u8) bool {
87-
@setEvalBranchQuota(5000);
88-
return std.mem.indexOfScalar(u8, &spaces, c) != null;
91+
fn isAlNumNaive(c: u8) bool {
92+
return isDigitNaive(c) or isAlphaNaive(c);
8993
}
9094

9195
/// A lookup table.
9296
const CombinedTable = struct {
9397
table: [256]u8,
9498

95-
const Index = enum {
96-
control,
99+
// We cannot have >8 variants here which means we should choose
100+
// only the most important/common functions to use a table for.
101+
const Index = enum(u3) {
97102
alphabetic,
98103
hexadecimal,
99-
alphanumeric,
104+
space,
105+
digit,
106+
lower,
107+
upper,
100108
punct,
101-
spaces,
109+
alnum,
102110
};
103111

104112
/// Generates a table which is filled with the results of the given function for all characters.
105113
fn getBoolTable(comptime condition: fn (u8) bool) [128]bool {
106-
@setEvalBranchQuota(2000);
114+
@setEvalBranchQuota(7500);
107115
comptime var table: [128]bool = undefined;
108116
comptime var index = 0;
109117
while (index < 128) : (index += 1) {
@@ -115,22 +123,26 @@ const CombinedTable = struct {
115123
fn init() CombinedTable {
116124
comptime var table: [256]u8 = undefined;
117125

118-
const control_table = comptime getBoolTable(isCntrlNaive);
119-
const alpha_table = comptime getBoolTable(isAlphaNaive);
120-
const hex_table = comptime getBoolTable(isXDigitNaive);
121-
const alphanumeric_table = comptime getBoolTable(isAlNumNaive);
126+
const alphabetic_table = comptime getBoolTable(isAlphaNaive);
127+
const hexadecimal_table = comptime getBoolTable(isXDigitNaive);
128+
const space_table = comptime getBoolTable(isSpaceNaive);
129+
const digit_table = comptime getBoolTable(isDigitNaive);
130+
const lower_table = comptime getBoolTable(isLowerNaive);
131+
const upper_table = comptime getBoolTable(isUpperNaive);
122132
const punct_table = comptime getBoolTable(isPunctNaive);
123-
const whitespace_table = comptime getBoolTable(isSpaceNaive);
133+
const alnum_table = comptime getBoolTable(isAlNumNaive);
124134

125135
comptime var i = 0;
126136
inline while (i < 128) : (i += 1) {
127137
table[i] =
128-
@boolToInt(control_table[i]) << @enumToInt(Index.control) |
129-
@boolToInt(alpha_table[i]) << @enumToInt(Index.alphabetic) |
130-
@boolToInt(hex_table[i]) << @enumToInt(Index.hexadecimal) |
131-
@boolToInt(alphanumeric_table[i]) << @enumToInt(Index.alphanumeric) |
138+
@boolToInt(alphabetic_table[i]) << @enumToInt(Index.alphabetic) |
139+
@boolToInt(hexadecimal_table[i]) << @enumToInt(Index.hexadecimal) |
140+
@boolToInt(space_table[i]) << @enumToInt(Index.space) |
141+
@boolToInt(digit_table[i]) << @enumToInt(Index.digit) |
142+
@boolToInt(lower_table[i]) << @enumToInt(Index.lower) |
143+
@boolToInt(upper_table[i]) << @enumToInt(Index.upper) |
132144
@boolToInt(punct_table[i]) << @enumToInt(Index.punct) |
133-
@boolToInt(whitespace_table[i]) << @enumToInt(Index.spaces);
145+
@boolToInt(alnum_table[i]) << @enumToInt(Index.alnum);
134146
}
135147

136148
std.mem.set(u8, table[128..256], 0);
@@ -155,7 +167,7 @@ else
155167
/// Returns whether the character is alphanumeric. This is case-insensitive.
156168
pub fn isAlNum(c: u8) bool {
157169
if (combined_table) |table|
158-
return table.contains(c, .alphanumeric)
170+
return table.contains(c, .alnum)
159171
else
160172
return isAlNumNaive(c);
161173
}
@@ -172,22 +184,25 @@ pub fn isAlpha(c: u8) bool {
172184
///
173185
/// See also: `control`
174186
pub fn isCntrl(c: u8) bool {
175-
if (combined_table) |table|
176-
return table.contains(c, .control)
177-
else
178-
return isCntrlNaive(c);
187+
return c <= control_code.US or c == control_code.DEL;
179188
}
180189

181190
pub fn isDigit(c: u8) bool {
182-
return c >= '0' and c <= '9';
191+
if (combined_table) |table|
192+
return table.contains(c, .digit)
193+
else
194+
return isDigitNaive(c);
183195
}
184196

185197
pub fn isGraph(c: u8) bool {
186198
return isPrint(c) and c != ' ';
187199
}
188200

189201
pub fn isLower(c: u8) bool {
190-
return c >= 'a' and c <= 'z';
202+
if (combined_table) |table|
203+
return table.contains(c, .lower)
204+
else
205+
return isLowerNaive(c);
191206
}
192207

193208
/// Returns whether the character has some graphical representation and can be printed.
@@ -204,7 +219,7 @@ pub fn isPunct(c: u8) bool {
204219

205220
pub fn isSpace(c: u8) bool {
206221
if (combined_table) |table|
207-
return table.contains(c, .spaces)
222+
return table.contains(c, .space)
208223
else
209224
return isSpaceNaive(c);
210225
}
@@ -223,7 +238,10 @@ test "spaces" {
223238
}
224239

225240
pub fn isUpper(c: u8) bool {
226-
return c >= 'A' and c <= 'Z';
241+
if (combined_table) |table|
242+
return table.contains(c, .upper)
243+
else
244+
return isUpperNaive(c);
227245
}
228246

229247
/// Returns whether the character is a hexadecimal digit. This is case-insensitive.

0 commit comments

Comments
 (0)