Skip to content

Commit a0236c8

Browse files
committed
Update utilities: add custom line tokenization
1 parent 0ee2749 commit a0236c8

File tree

3 files changed

+37
-7
lines changed

3 files changed

+37
-7
lines changed

build_all.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ for dir in *; do
55
if [ -d $dir -a -f "$dir/Cargo.toml" -a -f "$dir/src/lib.rs" ]; then
66
(
77
cd $dir
8-
cargo build --quiet
8+
cargo update
9+
cargo check --quiet
910
if [ $? != 0 ]; then
1011
echo "Build failed for $dir"
1112
rv=$?

util/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "util"
3-
version = "0.1.0"
3+
version = "0.2.0"
44
authors = ["Peter Goodspeed-Niklaus <[email protected]>"]
55

66
[dependencies]

util/src/lib.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,29 @@ pub fn read_file(filename: &str) -> String {
2525
///
2626
/// Empty lines and empty whitespace are discarded
2727
pub fn tokenize<'a>(input: &'a str) -> Tokenized<&'a str> {
28+
tokenize_by(input, |line| {
29+
line.split_whitespace()
30+
.filter(|ref token| !token.is_empty())
31+
.collect::<Vec<_>>()
32+
})
33+
}
34+
35+
/// Use a given tokenization function to convert a string to a `Vec<Vec<&str>>`
36+
///
37+
/// The outer vector contains the lines of the string, split by newlines
38+
/// The inner vector contains the tokens of the string, split by the supplied tokenization function
39+
///
40+
/// Empty lines are discarded
41+
pub fn tokenize_by<'a, LineTokenizer>(
42+
input: &'a str,
43+
line_tokenizer: LineTokenizer,
44+
) -> Tokenized<&'a str>
45+
where
46+
LineTokenizer: Fn(&'a str) -> Vec<&'a str>,
47+
{
2848
input
2949
.lines()
30-
.map(|line| {
31-
line.split_whitespace()
32-
.filter(|ref token| !token.is_empty())
33-
.collect::<Vec<_>>()
34-
})
50+
.map(line_tokenizer)
3551
.filter(|ref line| !line.is_empty())
3652
.collect()
3753
}
@@ -64,6 +80,19 @@ where
6480
parse_as(&tokenize(&read_file(filename)))
6581
}
6682

83+
/// Read the file whose path is given, tokenize it by the given line tokenization function,
84+
/// and parse the tokens as the specified type
85+
pub fn file_as_by<Output, LineTokenizer>(
86+
filename: &str,
87+
line_tokenizer: LineTokenizer,
88+
) -> ParsedTokens<Output>
89+
where
90+
LineTokenizer: Fn(&str) -> Vec<&str>,
91+
Output: FromStr,
92+
{
93+
parse_as(&tokenize_by(&read_file(filename), line_tokenizer))
94+
}
95+
6796
/// Convert a `Vec<Vec<T>>` -> `Vec<T>`
6897
///
6998
/// For each row of the outer vector, if the inner vector is not empty, takes the first item.

0 commit comments

Comments
 (0)