Skip to content

Commit b21a6c2

Browse files
authored
Add back table usage for some functions
1 parent 2fccf6b commit b21a6c2

File tree

1 file changed

+46
-34
lines changed

1 file changed

+46
-34
lines changed

lib/std/ascii.zig

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -59,51 +59,59 @@ pub const control_code = struct {
5959
// These naive functions are used to generate the lookup table
6060
// and they're used as fallbacks for if the lookup table isn't available.
6161
//
62-
// Note that some functions like for example `isDigit` don't use a table because it's slower.
63-
// Using a table is generally only useful if not all `true` values in the table would be in one row.
62+
// Note that even some very simple functions like `isDigit` use a table because it avoids
63+
// branching which is slow.
6464

65-
fn isCntrlNaive(c: u8) bool {
66-
return c <= control_code.US or c == control_code.DEL;
67-
}
6865
fn isAlphaNaive(c: u8) bool {
6966
return isLower(c) or isUpper(c);
7067
}
7168
fn isXDigitNaive(c: u8) bool {
72-
return isDigit(c) or
69+
return isDigitNaive(c) or
7370
(c >= 'a' and c <= 'f') or
7471
(c >= 'A' and c <= 'F');
7572
}
76-
fn isAlNumNaive(c: u8) bool {
77-
return isDigit(c) or isAlphaNaive(c);
73+
fn isSpaceNaive(c: u8) bool {
74+
return std.mem.indexOfScalar(u8, &spaces, c) != null;
75+
}
76+
fn isDigitNaive(c: u8) bool {
77+
return c >= '0' and c <= '9';
78+
}
79+
fn isLowerNaive(c: u8) bool {
80+
return c >= 'a' and c <= 'z';
81+
}
82+
fn isUpperNaive(c: u8) bool {
83+
return c >= 'A' and c <= 'Z';
7884
}
7985
fn isPunctNaive(c: u8) bool {
80-
@setEvalBranchQuota(3000);
8186
return (c >= '!' and c <= '/') or
8287
(c >= '[' and c <= '`') or
8388
(c >= '{' and c <= '~') or
8489
(c >= ':' and c <= '@');
8590
}
86-
fn isSpaceNaive(c: u8) bool {
87-
@setEvalBranchQuota(5000);
88-
return std.mem.indexOfScalar(u8, &spaces, c) != null;
91+
fn isAlNumNaive(c: u8) bool {
92+
return isDigitNaive(c) or isAlphaNaive(c);
8993
}
9094

9195
/// A lookup table.
9296
const CombinedTable = struct {
9397
table: [256]u8,
9498

95-
const Index = enum {
96-
control,
99+
// We cannot have >8 variants here which means we should choose
100+
// only the most important/common functions to use a table for.
101+
const Index = enum(u3) {
97102
alphabetic,
98103
hexadecimal,
99-
alphanumeric,
104+
space,
105+
digit,
106+
lower,
107+
upper,
100108
punct,
101-
spaces,
109+
alnum,
102110
};
103111

104112
/// Generates a table which is filled with the results of the given function for all characters.
105113
fn getBoolTable(comptime condition: fn (u8) bool) [128]bool {
106-
@setEvalBranchQuota(2000);
114+
@setEvalBranchQuota(7500);
107115
comptime var table: [128]bool = undefined;
108116
comptime var index = 0;
109117
while (index < 128) : (index += 1) {
@@ -115,22 +123,26 @@ const CombinedTable = struct {
115123
fn init() CombinedTable {
116124
comptime var table: [256]u8 = undefined;
117125

118-
const control_table = comptime getBoolTable(isCntrlNaive);
119-
const alpha_table = comptime getBoolTable(isAlphaNaive);
120-
const hex_table = comptime getBoolTable(isXDigitNaive);
121-
const alphanumeric_table = comptime getBoolTable(isAlNumNaive);
126+
const alphabetic_table = comptime getBoolTable(isAlphaNaive);
127+
const hexadecimal_table = comptime getBoolTable(isXDigitNaive);
128+
const space_table = comptime getBoolTable(isSpaceNaive);
129+
const digit_table = comptime getBoolTable(isDigitNaive);
130+
const lower_table = comptime getBoolTable(isLowerNaive);
131+
const upper_table = comptime getBoolTable(isUpperNaive);
122132
const punct_table = comptime getBoolTable(isPunctNaive);
123-
const whitespace_table = comptime getBoolTable(isSpaceNaive);
133+
const alnum_table = comptime getBoolTable(isAlNumNaive);
124134

125135
comptime var i = 0;
126136
inline while (i < 128) : (i += 1) {
127137
table[i] =
128-
@boolToInt(control_table[i]) << @enumToInt(Index.control) |
129-
@boolToInt(alpha_table[i]) << @enumToInt(Index.alphabetic) |
130-
@boolToInt(hex_table[i]) << @enumToInt(Index.hexadecimal) |
131-
@boolToInt(alphanumeric_table[i]) << @enumToInt(Index.alphanumeric) |
138+
@boolToInt(alphabetic_table[i]) << @enumToInt(Index.alphabetic) |
139+
@boolToInt(hexadecimal_table[i]) << @enumToInt(Index.hexadecimal) |
140+
@boolToInt(space_table[i]) << @enumToInt(Index.space) |
141+
@boolToInt(digit_table[i]) << @enumToInt(Index.digit) |
142+
@boolToInt(lower_table[i]) << @enumToInt(Index.lower) |
143+
@boolToInt(upper_table[i]) << @enumToInt(Index.upper) |
132144
@boolToInt(punct_table[i]) << @enumToInt(Index.punct) |
133-
@boolToInt(whitespace_table[i]) << @enumToInt(Index.spaces);
145+
@boolToInt(alnum_table[i]) << @enumToInt(Index.alnum);
134146
}
135147

136148
std.mem.set(u8, table[128..256], 0);
@@ -155,7 +167,7 @@ else
155167
/// Returns whether the character is alphanumeric. This is case-insensitive.
156168
pub fn isAlNum(c: u8) bool {
157169
if (combined_table) |table|
158-
return table.contains(c, .alphanumeric)
170+
return table.contains(c, .alnum)
159171
else
160172
return isAlNumNaive(c);
161173
}
@@ -172,14 +184,14 @@ pub fn isAlpha(c: u8) bool {
172184
///
173185
/// See also: `control`
174186
pub fn isCntrl(c: u8) bool {
175-
if (combined_table) |table|
176-
return table.contains(c, .control)
177-
else
178-
return isCntrlNaive(c);
187+
return c <= control_code.US or c == control_code.DEL;
179188
}
180189

181190
pub fn isDigit(c: u8) bool {
182-
return c >= '0' and c <= '9';
191+
if (combined_table) |table|
192+
return table.contains(c, .digit)
193+
else
194+
return isDigitNaive(c);
183195
}
184196

185197
pub fn isGraph(c: u8) bool {
@@ -204,7 +216,7 @@ pub fn isPunct(c: u8) bool {
204216

205217
pub fn isSpace(c: u8) bool {
206218
if (combined_table) |table|
207-
return table.contains(c, .spaces)
219+
return table.contains(c, .space)
208220
else
209221
return isSpaceNaive(c);
210222
}

0 commit comments

Comments
 (0)