1
- // Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does.
2
- // I could have taken only a u7 to make this clear, but it would be slower
3
- // It is my opinion that encodings other than UTF-8 should not be supported.
4
- //
5
- // (and 128 bytes is not much to pay).
6
- // Also does not handle Unicode character classes.
7
- //
8
- // https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png
1
+ //! The 7-bit [ASCII](https://en.wikipedia.org/wiki/ASCII) character encoding standard.
2
+ //!
3
+ //! This is not to be confused with the 8-bit [extended ASCII](https://en.wikipedia.org/wiki/Extended_ASCII) character encoding.
4
+ //!
5
+ //! Even though this module concerns itself with 7-bit ASCII,
6
+ //! functions use `u8` as the type instead of `u7` for convenience and compatibility.
7
+ //! Characters outside of the 7-bit range are gracefully handled (e.g. by returning `false`).
8
+ //!
9
+ //! See also: https://en.wikipedia.org/wiki/ASCII#Character_set
9
10
10
11
const std = @import ("std" );
11
12
12
- /// Contains constants for the C0 control codes of the ASCII encoding.
13
- /// https://en.wikipedia.org/wiki/C0_and_C1_control_codes
13
+ // TODO: remove all decls marked as DEPRECATED after 0.10.0's release
14
+
15
+ /// The C0 control codes of the ASCII encoding.
16
+ ///
17
+ /// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`.
14
18
pub const control_code = struct {
19
+ // DEPRECATED: use the lowercase variant
15
20
pub const NUL = 0x00 ;
21
+ // DEPRECATED: use the lowercase variant
16
22
pub const SOH = 0x01 ;
23
+ // DEPRECATED: use the lowercase variant
17
24
pub const STX = 0x02 ;
25
+ // DEPRECATED: use the lowercase variant
18
26
pub const ETX = 0x03 ;
27
+ // DEPRECATED: use the lowercase variant
19
28
pub const EOT = 0x04 ;
29
+ // DEPRECATED: use the lowercase variant
20
30
pub const ENQ = 0x05 ;
31
+ // DEPRECATED: use the lowercase variant
21
32
pub const ACK = 0x06 ;
33
+ // DEPRECATED: use the lowercase variant
22
34
pub const BEL = 0x07 ;
35
+ // DEPRECATED: use the lowercase variant
23
36
pub const BS = 0x08 ;
37
+ // DEPRECATED: use `ht`
24
38
pub const TAB = 0x09 ;
39
+ // DEPRECATED: use the lowercase variant
25
40
pub const LF = 0x0A ;
41
+ // DEPRECATED: use the lowercase variant
26
42
pub const VT = 0x0B ;
43
+ // DEPRECATED: use the lowercase variant
27
44
pub const FF = 0x0C ;
45
+ // DEPRECATED: use the lowercase variant
28
46
pub const CR = 0x0D ;
47
+ // DEPRECATED: use the lowercase variant
29
48
pub const SO = 0x0E ;
49
+ // DEPRECATED: use the lowercase variant
30
50
pub const SI = 0x0F ;
51
+ // DEPRECATED: use the lowercase variant
31
52
pub const DLE = 0x10 ;
53
+ // DEPRECATED: use the lowercase variant
32
54
pub const DC1 = 0x11 ;
55
+ // DEPRECATED: use the lowercase variant
33
56
pub const DC2 = 0x12 ;
57
+ // DEPRECATED: use the lowercase variant
34
58
pub const DC3 = 0x13 ;
59
+ // DEPRECATED: use the lowercase variant
35
60
pub const DC4 = 0x14 ;
61
+ // DEPRECATED: use the lowercase variant
36
62
pub const NAK = 0x15 ;
63
+ // DEPRECATED: use the lowercase variant
37
64
pub const SYN = 0x16 ;
65
+ // DEPRECATED: use the lowercase variant
38
66
pub const ETB = 0x17 ;
67
+ // DEPRECATED: use the lowercase variant
39
68
pub const CAN = 0x18 ;
69
+ // DEPRECATED: use the lowercase variant
40
70
pub const EM = 0x19 ;
71
+ // DEPRECATED: use the lowercase variant
41
72
pub const SUB = 0x1A ;
73
+ // DEPRECATED: use the lowercase variant
42
74
pub const ESC = 0x1B ;
75
+ // DEPRECATED: use the lowercase variant
43
76
pub const FS = 0x1C ;
77
+ // DEPRECATED: use the lowercase variant
44
78
pub const GS = 0x1D ;
79
+ // DEPRECATED: use the lowercase variant
45
80
pub const RS = 0x1E ;
81
+ // DEPRECATED: use the lowercase variant
46
82
pub const US = 0x1F ;
47
-
83
+ // DEPRECATED: use the lowercase variant
48
84
pub const DEL = 0x7F ;
49
-
85
+ // DEPRECATED: use the lowercase variant
50
86
pub const XON = 0x11 ;
87
+ // DEPRECATED: use the lowercase variant
51
88
pub const XOFF = 0x13 ;
89
+
90
+ /// Null.
91
+ pub const nul = 0x00 ;
92
+ /// Start of Heading.
93
+ pub const soh = 0x01 ;
94
+ /// Start of Text.
95
+ pub const stx = 0x02 ;
96
+ /// End of Text.
97
+ pub const etx = 0x03 ;
98
+ /// End of Transmission.
99
+ pub const eot = 0x04 ;
100
+ /// Enquiry.
101
+ pub const enq = 0x05 ;
102
+ /// Acknowledge.
103
+ pub const ack = 0x06 ;
104
+ /// Bell, Alert.
105
+ pub const bel = 0x07 ;
106
+ /// Backspace.
107
+ pub const bs = 0x08 ;
108
+ /// Horizontal Tab, Tab ('\t').
109
+ pub const ht = 0x09 ;
110
+ /// Line Feed, Newline ('\n').
111
+ pub const lf = 0x0A ;
112
+ /// Vertical Tab.
113
+ pub const vt = 0x0B ;
114
+ /// Form Feed.
115
+ pub const ff = 0x0C ;
116
+ /// Carriage Return ('\r').
117
+ pub const cr = 0x0D ;
118
+ /// Shift Out.
119
+ pub const so = 0x0E ;
120
+ /// Shift In.
121
+ pub const si = 0x0F ;
122
+ /// Data Link Escape.
123
+ pub const dle = 0x10 ;
124
+ /// Device Control One (XON).
125
+ pub const dc1 = 0x11 ;
126
+ /// Device Control Two.
127
+ pub const dc2 = 0x12 ;
128
+ /// Device Control Three (XOFF).
129
+ pub const dc3 = 0x13 ;
130
+ /// Device Control Four.
131
+ pub const dc4 = 0x14 ;
132
+ /// Negative Acknowledge.
133
+ pub const nak = 0x15 ;
134
+ /// Synchronous Idle.
135
+ pub const syn = 0x16 ;
136
+ /// End of Transmission Block
137
+ pub const etb = 0x17 ;
138
+ /// Cancel.
139
+ pub const can = 0x18 ;
140
+ /// End of Medium.
141
+ pub const em = 0x19 ;
142
+ /// Substitute.
143
+ pub const sub = 0x1A ;
144
+ /// Escape.
145
+ pub const esc = 0x1B ;
146
+ /// File Separator.
147
+ pub const fs = 0x1C ;
148
+ /// Group Separator.
149
+ pub const gs = 0x1D ;
150
+ /// Record Separator.
151
+ pub const rs = 0x1E ;
152
+ /// Unit Separator.
153
+ pub const us = 0x1F ;
154
+
155
+ /// Delete.
156
+ pub const del = 0x7F ;
157
+
158
+ /// An alias to `dc1`.
159
+ pub const xon = dc1 ;
160
+ /// An alias to `dc3`.
161
+ pub const xoff = dc3 ;
52
162
};
53
163
54
164
const tIndex = enum (u3 ) {
@@ -188,73 +298,106 @@ fn inTable(c: u8, t: tIndex) bool {
188
298
return (combinedTable [c ] & (@as (u8 , 1 ) << @enumToInt (t ))) != 0 ;
189
299
}
190
300
191
- pub fn isAlNum (c : u8 ) bool {
301
+ /// DEPRECATED: use `isAlphanumeric`
302
+ pub const isAlNum = isAlphanumeric ;
303
+ /// DEPRECATED: use `isAlpha`
304
+ pub const isAlpha = isAlphabetic ;
305
+ /// DEPRECATED: use `isAlpha`
306
+ pub const isCntrl = isControl ;
307
+ /// DEPRECATED: use `isWhitespace`.
308
+ pub const isSpace = isWhitespace ;
309
+ /// DEPRECATED: use `whitespace`.
310
+ pub const spaces = whitespace ;
311
+ /// DEPRECATED: use `isHex`.
312
+ pub const isXDigit = isHex ;
313
+
314
+ /// Returns whether the character is alphanumeric.
315
+ pub fn isAlphanumeric (c : u8 ) bool {
192
316
return (combinedTable [c ] & ((@as (u8 , 1 ) << @enumToInt (tIndex .Alpha )) |
193
317
@as (u8 , 1 ) << @enumToInt (tIndex .Digit ))) != 0 ;
194
318
}
195
319
196
- pub fn isAlpha (c : u8 ) bool {
320
+ /// Returns whether the character is alphabetic.
321
+ pub fn isAlphabetic (c : u8 ) bool {
197
322
return inTable (c , tIndex .Alpha );
198
323
}
199
324
200
- pub fn isCntrl (c : u8 ) bool {
201
- return c < 0x20 or c == 127 ; //DEL
325
+ /// Returns whether the character is a control character.
326
+ /// This is the same as `!isPrint(c)`.
327
+ ///
328
+ /// See also: `control_code`.
329
+ pub fn isControl (c : u8 ) bool {
330
+ return c <= control_code .us or c == control_code .del ;
202
331
}
203
332
333
+ /// Returns whether the character is a digit.
204
334
pub fn isDigit (c : u8 ) bool {
205
335
return inTable (c , tIndex .Digit );
206
336
}
207
337
338
+ /// DEPRECATED: use `isPrint(c) and c != ' '` instead
208
339
pub fn isGraph (c : u8 ) bool {
209
340
return inTable (c , tIndex .Graph );
210
341
}
211
342
343
+ /// Returns whether the character is a lowercased letter.
212
344
pub fn isLower (c : u8 ) bool {
213
345
return inTable (c , tIndex .Lower );
214
346
}
215
347
348
+ /// Returns whether the character has some graphical representation and can be printed.
349
+ /// This also returns `true` for the space character.
350
+ /// This is the same as `!isControl(c)`.
216
351
pub fn isPrint (c : u8 ) bool {
217
352
return inTable (c , tIndex .Graph ) or c == ' ' ;
218
353
}
219
354
355
+ /// DEPRECATED: create your own function based on your needs and what you want to do.
220
356
pub fn isPunct (c : u8 ) bool {
221
357
return inTable (c , tIndex .Punct );
222
358
}
223
359
224
- pub fn isSpace (c : u8 ) bool {
360
+ /// Returns whether this character is included in `whitespace`.
361
+ pub fn isWhitespace (c : u8 ) bool {
225
362
return inTable (c , tIndex .Space );
226
363
}
227
364
228
- /// All the values for which isSpace() returns true. This may be used with
229
- /// e.g. std.mem.trim() to trim whiteSpace.
230
- pub const spaces = [_ ]u8 { ' ' , '\t ' , '\n ' , '\r ' , control_code .VT , control_code .FF };
365
+ /// Whitespace for general use.
366
+ /// This may be used with e.g. `std.mem.trim` to trim whitespace.
367
+ ///
368
+ /// See also: `isWhitespace`.
369
+ pub const whitespace = [_ ]u8 { ' ' , '\t ' , '\n ' , '\r ' , control_code .vt , control_code .ff };
231
370
232
- test "spaces" {
233
- const testing = std .testing ;
234
- for (spaces ) | space | try testing .expect (isSpace (space ));
371
+ test "whitespace" {
372
+ for (whitespace ) | char | try std .testing .expect (isWhitespace (char ));
235
373
236
374
var i : u8 = 0 ;
237
375
while (isASCII (i )) : (i += 1 ) {
238
- if (isSpace (i )) try testing .expect (std .mem .indexOfScalar (u8 , & spaces , i ) != null );
376
+ if (isWhitespace (i )) try std . testing .expect (std .mem .indexOfScalar (u8 , & whitespace , i ) != null );
239
377
}
240
378
}
241
379
380
+ /// Returns whether the character is an uppercased letter.
242
381
pub fn isUpper (c : u8 ) bool {
243
382
return inTable (c , tIndex .Upper );
244
383
}
245
384
246
- pub fn isXDigit (c : u8 ) bool {
385
+ /// Returns whether the character is a hexadecimal digit. This is case-insensitive.
386
+ pub fn isHex (c : u8 ) bool {
247
387
return inTable (c , tIndex .Hex );
248
388
}
249
389
390
+ /// Returns whether the character is a 7-bit ASCII character.
250
391
pub fn isASCII (c : u8 ) bool {
251
392
return c < 128 ;
252
393
}
253
394
395
+ /// DEPRECATED: use `c == ' ' or c == '\t'` or try `isWhitespace`
254
396
pub fn isBlank (c : u8 ) bool {
255
397
return (c == ' ' ) or (c == '\x09 ' );
256
398
}
257
399
400
+ /// Uppercases the character and returns it as-is if it's already uppercased or not a letter.
258
401
pub fn toUpper (c : u8 ) u8 {
259
402
if (isLower (c )) {
260
403
return c & 0b11011111 ;
@@ -263,6 +406,7 @@ pub fn toUpper(c: u8) u8 {
263
406
}
264
407
}
265
408
409
+ /// Lowercases the character and returns it as-is if it's already lowercased or not a letter.
266
410
pub fn toLower (c : u8 ) u8 {
267
411
if (isUpper (c )) {
268
412
return c | 0b00100000 ;
@@ -274,13 +418,50 @@ pub fn toLower(c: u8) u8 {
274
418
test "ascii character classes" {
275
419
const testing = std .testing ;
276
420
421
+ try testing .expect (! isControl ('a' ));
422
+ try testing .expect (! isControl ('z' ));
423
+ try testing .expect (isControl (control_code .nul ));
424
+ try testing .expect (isControl (control_code .ff ));
425
+ try testing .expect (isControl (control_code .us ));
426
+
277
427
try testing .expect ('C' == toUpper ('c' ));
278
428
try testing .expect (':' == toUpper (':' ));
279
429
try testing .expect ('\xab ' == toUpper ('\xab ' ));
430
+ try testing .expect (! isUpper ('z' ));
431
+
280
432
try testing .expect ('c' == toLower ('C' ));
433
+ try testing .expect (':' == toLower (':' ));
434
+ try testing .expect ('\xab ' == toLower ('\xab ' ));
435
+ try testing .expect (! isLower ('Z' ));
436
+
437
+ try testing .expect (isAlphanumeric ('Z' ));
438
+ try testing .expect (isAlphanumeric ('z' ));
439
+ try testing .expect (isAlphanumeric ('5' ));
440
+ try testing .expect (isAlphanumeric ('5' ));
441
+ try testing .expect (! isAlphanumeric ('!' ));
442
+
443
+ try testing .expect (! isAlpha ('5' ));
281
444
try testing .expect (isAlpha ('c' ));
282
445
try testing .expect (! isAlpha ('5' ));
283
- try testing .expect (isSpace (' ' ));
446
+
447
+ try testing .expect (isWhitespace (' ' ));
448
+ try testing .expect (isWhitespace ('\t ' ));
449
+ try testing .expect (isWhitespace ('\r ' ));
450
+ try testing .expect (isWhitespace ('\n ' ));
451
+ try testing .expect (! isWhitespace ('.' ));
452
+
453
+ try testing .expect (! isHex ('g' ));
454
+ try testing .expect (isHex ('b' ));
455
+ try testing .expect (isHex ('9' ));
456
+
457
+ try testing .expect (! isDigit ('~' ));
458
+ try testing .expect (isDigit ('0' ));
459
+ try testing .expect (isDigit ('9' ));
460
+
461
+ try testing .expect (isPrint (' ' ));
462
+ try testing .expect (isPrint ('@' ));
463
+ try testing .expect (isPrint ('~' ));
464
+ try testing .expect (! isPrint (control_code .esc ));
284
465
}
285
466
286
467
/// Writes a lower case copy of `ascii_string` to `output`.
@@ -341,7 +522,7 @@ test "allocUpperString" {
341
522
try std .testing .expectEqualStrings ("ABCDEFGHIJKLMNOPQRST0234+💩!" , result );
342
523
}
343
524
344
- /// Compares strings `a` and `b` case insensitively and returns whether they are equal.
525
+ /// Compares strings `a` and `b` case- insensitively and returns whether they are equal.
345
526
pub fn eqlIgnoreCase (a : []const u8 , b : []const u8 ) bool {
346
527
if (a .len != b .len ) return false ;
347
528
for (a ) | a_c , i | {
@@ -397,11 +578,10 @@ test "indexOfIgnoreCase" {
397
578
try std .testing .expect (indexOfIgnoreCase ("one two three FouR" , "gOur" ) == null );
398
579
try std .testing .expect (indexOfIgnoreCase ("foO" , "Foo" ).? == 0 );
399
580
try std .testing .expect (indexOfIgnoreCase ("foo" , "fool" ) == null );
400
-
401
581
try std .testing .expect (indexOfIgnoreCase ("FOO foo" , "fOo" ).? == 0 );
402
582
}
403
583
404
- /// Compares two slices of numbers lexicographically . O(n).
584
+ /// Returns the lexicographical order of two slices . O(n).
405
585
pub fn orderIgnoreCase (lhs : []const u8 , rhs : []const u8 ) std.math.Order {
406
586
const n = std .math .min (lhs .len , rhs .len );
407
587
var i : usize = 0 ;
@@ -415,8 +595,7 @@ pub fn orderIgnoreCase(lhs: []const u8, rhs: []const u8) std.math.Order {
415
595
return std .math .order (lhs .len , rhs .len );
416
596
}
417
597
418
- /// Returns true if lhs < rhs, false otherwise
419
- /// TODO rename "IgnoreCase" to "Insensitive" in this entire file.
598
+ /// Returns whether the lexicographical order of `lhs` is lower than `rhs`.
420
599
pub fn lessThanIgnoreCase (lhs : []const u8 , rhs : []const u8 ) bool {
421
600
return orderIgnoreCase (lhs , rhs ) == .lt ;
422
601
}
0 commit comments