Skip to content

Commit 07116df

Browse files
committed
Merge #1270: Improve performance of bdk_file_store::EntryIter
51bd01b fix(file_store): recover file offset after read (志宇) 66dc34e refactor(file_store): Use BufReader but simplify (LLFourn) c871764 test(file_store): `last_write_is_short` (志宇) a3aa8b6 feat(file_store)!: optimize `EntryIter` by reducing syscalls (志宇) Pull request description: ### Description `EntryIter` performance is improved by reducing syscalls. The underlying file reader is wrapped with `BufReader` (to reduce calls to `read` and `seek`). Two new tests are introduced. One ensures correct behavior when the last changeset write is too short. The other ensures the next write position is correct after a short read. ### Notes to the reviewers This is extracted from #1172 as suggested by #1172 (review). ### Changelog notice Changed * `EntryIter` performance is improved by reducing syscalls. ### Checklists #### All Submissions: * [x] I've signed all my commits * [x] I followed the [contribution guidelines](https://github.com/bitcoindevkit/bdk/blob/master/CONTRIBUTING.md) * [x] I ran `cargo fmt` and `cargo clippy` before committing #### New Features: * [x] I've added tests for the new feature * [x] I've added docs for the new feature ACKs for top commit: LLFourn: ACK 51bd01b Tree-SHA512: 9c25f9f2032cb2d551f3fe4ac62b856ceeb69a388f037b34674af366c55629a2eaa2b90b1ae4fbd425415ea8d02f44493a6c643b4b1a57f4507e87aa7ade3736
2 parents 48b28e3 + 51bd01b commit 07116df

File tree

2 files changed

+158
-34
lines changed

2 files changed

+158
-34
lines changed

crates/file_store/src/entry_iter.rs

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use bincode::Options;
22
use std::{
33
fs::File,
4-
io::{self, Seek},
4+
io::{self, BufReader, Seek},
55
marker::PhantomData,
66
};
77

@@ -14,8 +14,9 @@ use crate::bincode_options;
1414
///
1515
/// [`next`]: Self::next
1616
pub struct EntryIter<'t, T> {
17-
db_file: Option<&'t mut File>,
18-
17+
/// Buffered reader around the file
18+
db_file: BufReader<&'t mut File>,
19+
finished: bool,
1920
/// The file position for the first read of `db_file`.
2021
start_pos: Option<u64>,
2122
types: PhantomData<T>,
@@ -24,8 +25,9 @@ pub struct EntryIter<'t, T> {
2425
impl<'t, T> EntryIter<'t, T> {
2526
pub fn new(start_pos: u64, db_file: &'t mut File) -> Self {
2627
Self {
27-
db_file: Some(db_file),
28+
db_file: BufReader::new(db_file),
2829
start_pos: Some(start_pos),
30+
finished: false,
2931
types: PhantomData,
3032
}
3133
}
@@ -38,44 +40,44 @@ where
3840
type Item = Result<T, IterError>;
3941

4042
fn next(&mut self) -> Option<Self::Item> {
41-
// closure which reads a single entry starting from `self.pos`
42-
let read_one = |f: &mut File, start_pos: Option<u64>| -> Result<Option<T>, IterError> {
43-
let pos = match start_pos {
44-
Some(pos) => f.seek(io::SeekFrom::Start(pos))?,
45-
None => f.stream_position()?,
46-
};
43+
if self.finished {
44+
return None;
45+
}
46+
(|| {
47+
if let Some(start) = self.start_pos.take() {
48+
self.db_file.seek(io::SeekFrom::Start(start))?;
49+
}
4750

48-
match bincode_options().deserialize_from(&*f) {
49-
Ok(changeset) => {
50-
f.stream_position()?;
51-
Ok(Some(changeset))
52-
}
51+
let pos_before_read = self.db_file.stream_position()?;
52+
match bincode_options().deserialize_from(&mut self.db_file) {
53+
Ok(changeset) => Ok(Some(changeset)),
5354
Err(e) => {
55+
self.finished = true;
56+
let pos_after_read = self.db_file.stream_position()?;
57+
// allow unexpected EOF if 0 bytes were read
5458
if let bincode::ErrorKind::Io(inner) = &*e {
55-
if inner.kind() == io::ErrorKind::UnexpectedEof {
56-
let eof = f.seek(io::SeekFrom::End(0))?;
57-
if pos == eof {
58-
return Ok(None);
59-
}
59+
if inner.kind() == io::ErrorKind::UnexpectedEof
60+
&& pos_after_read == pos_before_read
61+
{
62+
return Ok(None);
6063
}
6164
}
62-
f.seek(io::SeekFrom::Start(pos))?;
65+
self.db_file.seek(io::SeekFrom::Start(pos_before_read))?;
6366
Err(IterError::Bincode(*e))
6467
}
6568
}
66-
};
67-
68-
let result = read_one(self.db_file.as_mut()?, self.start_pos.take());
69-
if result.is_err() {
70-
self.db_file = None;
71-
}
72-
result.transpose()
69+
})()
70+
.transpose()
7371
}
7472
}
7573

76-
impl From<io::Error> for IterError {
77-
fn from(value: io::Error) -> Self {
78-
IterError::Io(value)
74+
impl<'t, T> Drop for EntryIter<'t, T> {
75+
fn drop(&mut self) {
76+
// This syncs the underlying file's offset with the buffer's position. This way, we
77+
// maintain the correct position to start the next read/write.
78+
if let Ok(pos) = self.db_file.stream_position() {
79+
let _ = self.db_file.get_mut().seek(io::SeekFrom::Start(pos));
80+
}
7981
}
8082
}
8183

@@ -97,4 +99,10 @@ impl core::fmt::Display for IterError {
9799
}
98100
}
99101

102+
impl From<io::Error> for IterError {
103+
fn from(value: io::Error) -> Self {
104+
IterError::Io(value)
105+
}
106+
}
107+
100108
impl std::error::Error for IterError {}

crates/file_store/src/store.rs

Lines changed: 119 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ mod test {
219219

220220
use bincode::DefaultOptions;
221221
use std::{
222+
collections::BTreeSet,
222223
io::{Read, Write},
223224
vec::Vec,
224225
};
@@ -228,7 +229,7 @@ mod test {
228229
const TEST_MAGIC_BYTES: [u8; TEST_MAGIC_BYTES_LEN] =
229230
[98, 100, 107, 102, 115, 49, 49, 49, 49, 49, 49, 49];
230231

231-
type TestChangeSet = Vec<String>;
232+
type TestChangeSet = BTreeSet<String>;
232233

233234
#[derive(Debug)]
234235
struct TestTracker;
@@ -253,7 +254,7 @@ mod test {
253254
fn open_or_create_new() {
254255
let temp_dir = tempfile::tempdir().unwrap();
255256
let file_path = temp_dir.path().join("db_file");
256-
let changeset = vec!["hello".to_string(), "world".to_string()];
257+
let changeset = BTreeSet::from(["hello".to_string(), "world".to_string()]);
257258

258259
{
259260
let mut db = Store::<TestChangeSet>::open_or_create_new(&TEST_MAGIC_BYTES, &file_path)
@@ -304,7 +305,7 @@ mod test {
304305
let mut data = [255_u8; 2000];
305306
data[..TEST_MAGIC_BYTES_LEN].copy_from_slice(&TEST_MAGIC_BYTES);
306307

307-
let changeset = vec!["one".into(), "two".into(), "three!".into()];
308+
let changeset = TestChangeSet::from(["one".into(), "two".into(), "three!".into()]);
308309

309310
let mut file = NamedTempFile::new().unwrap();
310311
file.write_all(&data).expect("should write");
@@ -340,4 +341,119 @@ mod test {
340341

341342
assert_eq!(got_bytes, expected_bytes);
342343
}
344+
345+
#[test]
346+
fn last_write_is_short() {
347+
let temp_dir = tempfile::tempdir().unwrap();
348+
349+
let changesets = [
350+
TestChangeSet::from(["1".into()]),
351+
TestChangeSet::from(["2".into(), "3".into()]),
352+
TestChangeSet::from(["4".into(), "5".into(), "6".into()]),
353+
];
354+
let last_changeset = TestChangeSet::from(["7".into(), "8".into(), "9".into()]);
355+
let last_changeset_bytes = bincode_options().serialize(&last_changeset).unwrap();
356+
357+
for short_write_len in 1..last_changeset_bytes.len() - 1 {
358+
let file_path = temp_dir.path().join(format!("{}.dat", short_write_len));
359+
println!("Test file: {:?}", file_path);
360+
361+
// simulate creating a file, writing data where the last write is incomplete
362+
{
363+
let mut db =
364+
Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path).unwrap();
365+
for changeset in &changesets {
366+
db.append_changeset(changeset).unwrap();
367+
}
368+
// this is the incomplete write
369+
db.db_file
370+
.write_all(&last_changeset_bytes[..short_write_len])
371+
.unwrap();
372+
}
373+
374+
// load file again and aggregate changesets
375+
// write the last changeset again (this time it succeeds)
376+
{
377+
let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
378+
let err = db
379+
.aggregate_changesets()
380+
.expect_err("should return error as last read is short");
381+
assert_eq!(
382+
err.changeset,
383+
changesets.iter().cloned().reduce(|mut acc, cs| {
384+
Append::append(&mut acc, cs);
385+
acc
386+
}),
387+
"should recover all changesets that are written in full",
388+
);
389+
db.db_file.write_all(&last_changeset_bytes).unwrap();
390+
}
391+
392+
// load file again - this time we should successfully aggregate all changesets
393+
{
394+
let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
395+
let aggregated_changesets = db
396+
.aggregate_changesets()
397+
.expect("aggregating all changesets should succeed");
398+
assert_eq!(
399+
aggregated_changesets,
400+
changesets
401+
.iter()
402+
.cloned()
403+
.chain(core::iter::once(last_changeset.clone()))
404+
.reduce(|mut acc, cs| {
405+
Append::append(&mut acc, cs);
406+
acc
407+
}),
408+
"should recover all changesets",
409+
);
410+
}
411+
}
412+
}
413+
414+
#[test]
415+
fn write_after_short_read() {
416+
let temp_dir = tempfile::tempdir().unwrap();
417+
418+
let changesets = (0..20)
419+
.map(|n| TestChangeSet::from([format!("{}", n)]))
420+
.collect::<Vec<_>>();
421+
let last_changeset = TestChangeSet::from(["last".into()]);
422+
423+
for read_count in 0..changesets.len() {
424+
let file_path = temp_dir.path().join(format!("{}.dat", read_count));
425+
println!("Test file: {:?}", file_path);
426+
427+
// First, we create the file with all the changesets!
428+
let mut db = Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path).unwrap();
429+
for changeset in &changesets {
430+
db.append_changeset(changeset).unwrap();
431+
}
432+
drop(db);
433+
434+
// We re-open the file and read `read_count` number of changesets.
435+
let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
436+
let mut exp_aggregation = db
437+
.iter_changesets()
438+
.take(read_count)
439+
.map(|r| r.expect("must read valid changeset"))
440+
.fold(TestChangeSet::default(), |mut acc, v| {
441+
Append::append(&mut acc, v);
442+
acc
443+
});
444+
// We write after a short read.
445+
db.write_changes(&last_changeset)
446+
.expect("last write must succeed");
447+
Append::append(&mut exp_aggregation, last_changeset.clone());
448+
drop(db);
449+
450+
// We open the file again and check whether aggregate changeset is expected.
451+
let aggregation = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path)
452+
.unwrap()
453+
.aggregate_changesets()
454+
.expect("must aggregate changesets")
455+
.unwrap_or_default();
456+
assert_eq!(aggregation, exp_aggregation);
457+
}
458+
}
343459
}

0 commit comments

Comments
 (0)