@@ -25,13 +25,29 @@ pub fn read_file(filename: &str) -> String {
25
25
///
26
26
/// Empty lines and empty whitespace are discarded
27
27
pub fn tokenize < ' a > ( input : & ' a str ) -> Tokenized < & ' a str > {
28
+ tokenize_by ( input, |line| {
29
+ line. split_whitespace ( )
30
+ . filter ( |ref token| !token. is_empty ( ) )
31
+ . collect :: < Vec < _ > > ( )
32
+ } )
33
+ }
34
+
35
+ /// Use a given tokenization function to convert a string to a `Vec<Vec<&str>>`
36
+ ///
37
+ /// The outer vector contains the lines of the string, split by newlines
38
+ /// The inner vector contains the tokens of the string, split by the supplied tokenization function
39
+ ///
40
+ /// Empty lines are discarded
41
+ pub fn tokenize_by < ' a , LineTokenizer > (
42
+ input : & ' a str ,
43
+ line_tokenizer : LineTokenizer ,
44
+ ) -> Tokenized < & ' a str >
45
+ where
46
+ LineTokenizer : Fn ( & ' a str ) -> Vec < & ' a str > ,
47
+ {
28
48
input
29
49
. lines ( )
30
- . map ( |line| {
31
- line. split_whitespace ( )
32
- . filter ( |ref token| !token. is_empty ( ) )
33
- . collect :: < Vec < _ > > ( )
34
- } )
50
+ . map ( line_tokenizer)
35
51
. filter ( |ref line| !line. is_empty ( ) )
36
52
. collect ( )
37
53
}
64
80
parse_as ( & tokenize ( & read_file ( filename) ) )
65
81
}
66
82
83
+ /// Read the file whose path is given, tokenize it by the given line tokenization function,
84
+ /// and parse the tokens as the specified type
85
+ pub fn file_as_by < Output , LineTokenizer > (
86
+ filename : & str ,
87
+ line_tokenizer : LineTokenizer ,
88
+ ) -> ParsedTokens < Output >
89
+ where
90
+ LineTokenizer : Fn ( & str ) -> Vec < & str > ,
91
+ Output : FromStr ,
92
+ {
93
+ parse_as ( & tokenize_by ( & read_file ( filename) , line_tokenizer) )
94
+ }
95
+
67
96
/// Convert a `Vec<Vec<T>>` -> `Vec<T>`
68
97
///
69
98
/// For each row of the outer vector, if the inner vector is not empty, takes the first item.
0 commit comments