Skip to content

Commit 2129c23

Browse files
perf: Add new efficient APIs read_unsafe and read_to_vec (#248)
I found that a source of significant performance loss is the read method of `Memory`. The `read` method takes a mutable buffer which it fills with values read from the stable memory. According to Rust rules, the buffer passed to read must be initialized before it's passed to read (buffers containing uninitialized values are unsound and can cause UB). The usual pattern is to create a properly sized Vec, eg. by using `vec![0; size]` or `vec.resize(size, 0)` and pass that to `read`. However, initializing the bytes with values that get overwritten by `read` is only necessary in order to be sound and requires significant number of instructions. This PR introduces a new method `read_unsafe` which allows passing in a raw pointer and a `count` parameter. Implementations can be more efficient by reading directly and skipping initialization. This can lead to instruction reductions of up to 40%. The PR also introduces a helper method `read_to_vec` which is a safe wrapper around `read_unsafe` for the most common use-case: reading into a `Vec`. Clients can for example pass an empty `Vec` and profit from the extra efficiency without having to call unsafe methods. --------- Co-authored-by: Andriy Berestovskyy <[email protected]>
1 parent 0b510c0 commit 2129c23

13 files changed

+305
-160
lines changed

canbench_results.yml

+106-106
Large diffs are not rendered by default.

src/base_vec.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
//! bytes required to represent integers up to that max size.
3434
use crate::storable::{bounds, bytes_to_store_size_bounded};
3535
use crate::{
36-
read_u32, read_u64, safe_write, write, write_u32, write_u64, Address, GrowFailed, Memory,
37-
Storable,
36+
read_to_vec, read_u32, read_u64, safe_write, write, write_u32, write_u64, Address, GrowFailed,
37+
Memory, Storable,
3838
};
3939
use std::borrow::{Borrow, Cow};
4040
use std::cmp::min;
@@ -245,11 +245,10 @@ impl<T: Storable, M: Memory> BaseVec<T, M> {
245245
}
246246

247247
/// Reads the item at the specified index without any bound checks.
248-
fn read_entry_to(&self, index: u64, buf: &mut std::vec::Vec<u8>) {
248+
fn read_entry_to(&self, index: u64, buf: &mut Vec<u8>) {
249249
let offset = DATA_OFFSET + slot_size::<T>() as u64 * index;
250250
let (data_offset, data_size) = self.read_entry_size(offset);
251-
buf.resize(data_size, 0);
252-
self.memory.read(data_offset, &mut buf[..]);
251+
read_to_vec(&self.memory, data_offset.into(), buf, data_size);
253252
}
254253

255254
/// Sets the vector's length.

src/btreemap/node.rs

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
btreemap::Allocator,
3-
read_struct, read_u32, read_u64,
3+
read_struct, read_to_vec, read_u32, read_u64,
44
storable::Storable,
55
types::{Address, Bytes},
66
write, write_struct, write_u32, Memory,
@@ -190,7 +190,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
190190
value.take_or_load(|offset| self.load_value_from_memory(offset, memory))
191191
}
192192

193-
/// Loads a value from stable memory at the given offset.
193+
/// Loads a value from stable memory at the given offset of this node.
194194
fn load_value_from_memory<M: Memory>(&self, offset: Bytes, memory: &M) -> Vec<u8> {
195195
let reader = NodeReader {
196196
address: self.address,
@@ -200,8 +200,13 @@ impl<K: Storable + Ord + Clone> Node<K> {
200200
};
201201

202202
let value_len = read_u32(&reader, Address::from(offset.get())) as usize;
203-
let mut bytes = vec![0; value_len];
204-
reader.read((offset + U32_SIZE).get(), &mut bytes);
203+
let mut bytes = vec![];
204+
read_to_vec(
205+
&reader,
206+
Address::from((offset + U32_SIZE).get()),
207+
&mut bytes,
208+
value_len,
209+
);
205210

206211
bytes
207212
}

src/btreemap/node/io.rs

+22-10
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,21 @@ pub struct NodeReader<'a, M: Memory> {
1818
// Note: The `Memory` interface is implemented so that helper methods such `read_u32`,
1919
// `read_struct`, etc. can be used with a `NodeReader` directly.
2020
impl<'a, M: Memory> Memory for NodeReader<'a, M> {
21-
fn read(&self, offset: u64, dst: &mut [u8]) {
21+
unsafe fn read_unsafe(&self, offset: u64, dst: *mut u8, count: usize) {
2222
// If the read is only in the initial page, then read it directly in one go.
2323
// This is a performance enhancement to avoid the cost of creating a `NodeIterator`.
24-
if (offset + dst.len() as u64) < self.page_size.get() as u64 {
25-
self.memory.read(self.address.get() + offset, dst);
24+
if (offset + count as u64) < self.page_size.get() as u64 {
25+
self.memory
26+
.read_unsafe(self.address.get() + offset, dst, count);
2627
return;
2728
}
2829

29-
// The read is split across several pages. Create a `NodeIterator` to to read from
30+
// The read is split across several pages. Create a `NodeIterator` to read from
3031
// each of the individual pages.
3132
let iter = NodeIterator::new(
3233
VirtualSegment {
3334
address: Address::from(offset),
34-
length: Bytes::from(dst.len() as u64),
35+
length: Bytes::from(count as u64),
3536
},
3637
Bytes::from(self.page_size.get()),
3738
);
@@ -43,22 +44,33 @@ impl<'a, M: Memory> Memory for NodeReader<'a, M> {
4344
length,
4445
} in iter
4546
{
47+
// SAFETY: read_unsafe() is safe to call iff bytes_read + length <= count since the
48+
// caller guarantees that we can write `count` number of bytes to `dst`.
49+
assert!(bytes_read + length.get() as usize <= count);
4650
if page_idx == 0 {
47-
self.memory.read(
51+
self.memory.read_unsafe(
4852
(self.address + offset).get(),
49-
&mut dst[bytes_read as usize..(bytes_read + length.get()) as usize],
53+
dst.add(bytes_read),
54+
length.get() as usize,
5055
);
5156
} else {
52-
self.memory.read(
57+
self.memory.read_unsafe(
5358
(self.overflows[page_idx - 1] + offset).get(),
54-
&mut dst[bytes_read as usize..(bytes_read + length.get()) as usize],
59+
dst.add(bytes_read),
60+
length.get() as usize,
5561
);
5662
}
5763

58-
bytes_read += length.get();
64+
bytes_read += length.get() as usize;
5965
}
6066
}
6167

68+
#[inline]
69+
fn read(&self, offset: u64, dst: &mut [u8]) {
70+
// SAFETY: since dst is dst.len() long, it fulfills the safety requirements of read_unsafe.
71+
unsafe { self.read_unsafe(offset, dst.as_mut_ptr(), dst.len()) }
72+
}
73+
6274
fn write(&self, _: u64, _: &[u8]) {
6375
unreachable!("NodeReader does not support write")
6476
}

src/btreemap/node/v1.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,14 @@ impl<K: Storable + Ord + Clone> Node<K> {
6565
// Load the entries.
6666
let mut keys_encoded_values = Vec::with_capacity(header.num_entries as usize);
6767
let mut offset = NodeHeader::size();
68-
let mut buf = Vec::with_capacity(max_key_size.max(max_value_size) as usize);
68+
let mut buf = vec![];
6969
for _ in 0..header.num_entries {
7070
// Read the key's size.
7171
let key_size = read_u32(memory, address + offset);
7272
offset += U32_SIZE;
7373

7474
// Read the key.
75-
buf.resize(key_size as usize, 0);
76-
memory.read((address + offset).get(), &mut buf);
75+
read_to_vec(memory, address + offset, &mut buf, key_size as usize);
7776
offset += Bytes::from(max_key_size);
7877
let key = K::from_bytes(Cow::Borrowed(&buf));
7978
// Values are loaded lazily. Store a reference and skip loading it.
@@ -86,7 +85,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
8685
let mut children = vec![];
8786
if header.node_type == INTERNAL_NODE_TYPE {
8887
// The number of children is equal to the number of entries + 1.
89-
children.reserve(header.num_entries as usize + 1);
88+
children.reserve_exact(header.num_entries as usize + 1);
9089
for _ in 0..header.num_entries + 1 {
9190
let child = Address::from(read_u64(memory, address + offset));
9291
offset += Address::size();

src/btreemap/node/v2.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
141141
let mut children = vec![];
142142
if node_type == NodeType::Internal {
143143
// The number of children is equal to the number of entries + 1.
144+
children.reserve_exact(num_entries + 1);
144145
for _ in 0..num_entries + 1 {
145146
let child = Address::from(read_u64(&reader, offset));
146147
offset += Address::size();
@@ -164,8 +165,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
164165
};
165166

166167
// Load the key.
167-
buf.resize(key_size as usize, 0);
168-
reader.read(offset.get(), &mut buf);
168+
read_to_vec(&reader, offset, &mut buf, key_size as usize);
169169
let key = K::from_bytes(Cow::Borrowed(&buf));
170170
offset += Bytes::from(key_size);
171171
keys_encoded_values.push((key, Value::by_ref(Bytes::from(0usize))));

src/cell.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! A serializable value stored in the stable memory.
22
use crate::storable::Storable;
3-
use crate::{Memory, WASM_PAGE_SIZE};
3+
use crate::{read_to_vec, Memory, WASM_PAGE_SIZE};
44
use std::borrow::{Borrow, Cow};
55
use std::fmt;
66

@@ -132,8 +132,8 @@ impl<T: Storable, M: Memory> Cell<T, M> {
132132
///
133133
/// PRECONDITION: memory is large enough to contain the value.
134134
fn read_value(memory: &M, len: u32) -> T {
135-
let mut buf = vec![0; len as usize];
136-
memory.read(HEADER_V1_SIZE, &mut buf);
135+
let mut buf = vec![];
136+
read_to_vec(memory, HEADER_V1_SIZE.into(), &mut buf, len as usize);
137137
T::from_bytes(Cow::Owned(buf))
138138
}
139139

src/ic0_memory.rs

+6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ impl Memory for Ic0StableMemory {
3030
unsafe { stable64_read(dst.as_ptr() as u64, offset, dst.len() as u64) }
3131
}
3232

33+
#[inline]
34+
unsafe fn read_unsafe(&self, offset: u64, dst: *mut u8, count: usize) {
35+
// SAFETY: This is safe because of the ic0 api guarantees.
36+
stable64_read(dst as u64, offset, count as u64);
37+
}
38+
3339
#[inline]
3440
fn write(&self, offset: u64, src: &[u8]) {
3541
// SAFETY: This is safe because of the ic0 api guarantees.

src/lib.rs

+61-18
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub use file_mem::FileMemory;
2626
pub use ic0_memory::Ic0StableMemory;
2727
use std::error;
2828
use std::fmt::{Display, Formatter};
29+
use std::mem::MaybeUninit;
2930
pub use storable::Storable;
3031
use types::Address;
3132
pub use vec_mem::VectorMemory;
@@ -46,42 +47,81 @@ pub trait Memory {
4647
/// pages. (One WebAssembly page is 64Ki bytes.)
4748
fn size(&self) -> u64;
4849

49-
/// Tries to grow the memory by new_pages many pages containing
50+
/// Tries to grow the memory by `pages` many pages containing
5051
/// zeroes. If successful, returns the previous size of the
5152
/// memory (in pages). Otherwise, returns -1.
5253
fn grow(&self, pages: u64) -> i64;
5354

54-
/// Copies the data referred to by offset out of the stable memory
55-
/// and replaces the corresponding bytes in dst.
55+
/// Copies the data referred to by `offset` out of the stable memory
56+
/// and replaces the corresponding bytes in `dst`.
5657
fn read(&self, offset: u64, dst: &mut [u8]);
5758

58-
/// Copies the data referred to by src and replaces the
59+
/// Copies `count` number of bytes of the data starting from `offset` out of the stable memory
60+
/// into the buffer starting at `dst`.
61+
///
62+
/// This method is an alternative to `read` which does not require initializing a buffer and may
63+
/// therefore be faster.
64+
///
65+
/// # Safety
66+
///
67+
/// Callers must guarantee that
68+
/// * it is valid to write `count` number of bytes starting from `dst`,
69+
/// * `dst..dst + count` does not overlap with `self`.
70+
///
71+
/// Implementations must guarantee that before the method returns, `count` number of bytes
72+
/// starting from `dst` will be initialized.
73+
#[inline]
74+
unsafe fn read_unsafe(&self, offset: u64, dst: *mut u8, count: usize) {
75+
// Initialize the buffer to make the slice valid.
76+
std::ptr::write_bytes(dst, 0, count);
77+
let slice = std::slice::from_raw_parts_mut(dst, count);
78+
self.read(offset, slice)
79+
}
80+
81+
/// Copies the data referred to by `src` and replaces the
5982
/// corresponding segment starting at offset in the stable memory.
6083
fn write(&self, offset: u64, src: &[u8]);
6184
}
6285

63-
// A helper function that reads a single 32bit integer encoded as
64-
// little-endian from the specified memory at the specified offset.
86+
/// Copies `count` bytes of data starting from `addr` out of the stable memory into `dst`.
87+
///
88+
/// Callers are allowed to pass vectors in any state (e.g. empty vectors).
89+
/// After the method returns, `dst.len() == count`.
90+
/// This method is an alternative to `read` which does not require initializing a buffer and may
91+
/// therefore be faster.
92+
#[inline]
93+
fn read_to_vec<M: Memory>(m: &M, addr: Address, dst: &mut std::vec::Vec<u8>, count: usize) {
94+
dst.clear();
95+
dst.reserve_exact(count);
96+
unsafe {
97+
m.read_unsafe(addr.get(), dst.as_mut_ptr(), count);
98+
// SAFETY: read_unsafe guarantees to initialize the first `count` bytes
99+
dst.set_len(count);
100+
}
101+
}
102+
103+
/// A helper function that reads a single 32bit integer encoded as
104+
/// little-endian from the specified memory at the specified offset.
65105
fn read_u32<M: Memory>(m: &M, addr: Address) -> u32 {
66106
let mut buf: [u8; 4] = [0; 4];
67107
m.read(addr.get(), &mut buf);
68108
u32::from_le_bytes(buf)
69109
}
70110

71-
// A helper function that reads a single 64bit integer encoded as
72-
// little-endian from the specified memory at the specified offset.
111+
/// A helper function that reads a single 64bit integer encoded as
112+
/// little-endian from the specified memory at the specified offset.
73113
fn read_u64<M: Memory>(m: &M, addr: Address) -> u64 {
74114
let mut buf: [u8; 8] = [0; 8];
75115
m.read(addr.get(), &mut buf);
76116
u64::from_le_bytes(buf)
77117
}
78118

79-
// Writes a single 32-bit integer encoded as little-endian.
119+
/// Writes a single 32-bit integer encoded as little-endian.
80120
fn write_u32<M: Memory>(m: &M, addr: Address, val: u32) {
81121
write(m, addr.get(), &val.to_le_bytes());
82122
}
83123

84-
// Writes a single 64-bit integer encoded as little-endian.
124+
/// Writes a single 64-bit integer encoded as little-endian.
85125
fn write_u64<M: Memory>(m: &M, addr: Address, val: u64) {
86126
write(m, addr.get(), &val.to_le_bytes());
87127
}
@@ -148,17 +188,20 @@ fn write<M: Memory>(memory: &M, offset: u64, bytes: &[u8]) {
148188
}
149189
}
150190

151-
// Reads a struct from memory.
191+
/// Reads a struct from memory.
152192
fn read_struct<T, M: Memory>(addr: Address, memory: &M) -> T {
153-
let mut t: T = unsafe { core::mem::zeroed() };
154-
let t_slice = unsafe {
155-
core::slice::from_raw_parts_mut(&mut t as *mut _ as *mut u8, core::mem::size_of::<T>())
156-
};
157-
memory.read(addr.get(), t_slice);
158-
t
193+
let mut value = MaybeUninit::<T>::uninit();
194+
unsafe {
195+
memory.read_unsafe(
196+
addr.get(),
197+
value.as_mut_ptr() as *mut u8,
198+
core::mem::size_of::<T>(),
199+
);
200+
value.assume_init()
201+
}
159202
}
160203

161-
// Writes a struct to memory.
204+
/// Writes a struct to memory.
162205
fn write_struct<T, M: Memory>(t: &T, addr: Address, memory: &M) {
163206
let slice = unsafe {
164207
core::slice::from_raw_parts(t as *const _ as *const u8, core::mem::size_of::<T>())

src/log.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
//! ----------------------------------------
5555
//! Unallocated space
5656
//! ```
57-
use crate::{read_u64, safe_write, write_u64, Address, GrowFailed, Memory, Storable};
57+
use crate::{read_to_vec, read_u64, safe_write, write_u64, Address, GrowFailed, Memory, Storable};
5858
use std::borrow::Cow;
5959
use std::cell::RefCell;
6060
use std::fmt;
@@ -331,8 +331,7 @@ impl<T: Storable, INDEX: Memory, DATA: Memory> Log<T, INDEX, DATA> {
331331
/// ignores the result.
332332
pub fn read_entry(&self, idx: u64, buf: &mut Vec<u8>) -> Result<(), NoSuchEntry> {
333333
let (offset, len) = self.entry_meta(idx).ok_or(NoSuchEntry)?;
334-
buf.resize(len, 0);
335-
self.data_memory.read(HEADER_OFFSET + offset, buf);
334+
read_to_vec(&self.data_memory, (HEADER_OFFSET + offset).into(), buf, len);
336335
Ok(())
337336
}
338337

src/memory_manager.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
//! assert_eq!(bytes, vec![4, 5, 6]);
4242
//! ```
4343
use crate::{
44-
read_struct,
44+
read_struct, read_to_vec,
4545
types::{Address, Bytes},
4646
write, write_struct, Memory, WASM_PAGE_SIZE,
4747
};
@@ -239,9 +239,9 @@ impl<M: Memory> MemoryManagerInner<M> {
239239
}
240240

241241
// Check if the magic in the memory corresponds to this object.
242-
let mut dst = vec![0; 3];
242+
let mut dst = [0; 3];
243243
memory.read(0, &mut dst);
244-
if dst != MAGIC {
244+
if &dst != MAGIC {
245245
// No memory manager found. Create a new instance.
246246
MemoryManagerInner::new(memory, bucket_size_in_pages)
247247
} else {
@@ -277,8 +277,13 @@ impl<M: Memory> MemoryManagerInner<M> {
277277
assert_eq!(&header.magic, MAGIC, "Bad magic.");
278278
assert_eq!(header.version, LAYOUT_VERSION, "Unsupported version.");
279279

280-
let mut buckets = vec![0; MAX_NUM_BUCKETS as usize];
281-
memory.read(bucket_allocations_address(BucketId(0)).get(), &mut buckets);
280+
let mut buckets = vec![];
281+
read_to_vec(
282+
&memory,
283+
bucket_allocations_address(BucketId(0)),
284+
&mut buckets,
285+
MAX_NUM_BUCKETS as usize,
286+
);
282287

283288
let mut memory_buckets = BTreeMap::new();
284289
for (bucket_idx, memory) in buckets.into_iter().enumerate() {

0 commit comments

Comments
 (0)