Skip to content

Commit f6c9c46

Browse files
authored
Merge pull request #1982 from CosmWasm/one-engine-per-module
Store engine together with Module to mitigate memory increase issue
2 parents 34914b1 + 1b110c6 commit f6c9c46

File tree

7 files changed

+206
-124
lines changed

7 files changed

+206
-124
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ and this project adheres to
66

77
## [2.0.0-beta.0] - 2023-12-21
88

9+
### Fixed
10+
11+
- cosmwasm-vm: Fix memory increase issue (1.3 -> 1.4 regression) by avoiding the
12+
use of a long running Wasmer Engine. ([#1978])
13+
14+
[#1978]: https://github.com/CosmWasm/cosmwasm/issues/1978
15+
916
### Added
1017

1118
- cosmwasm-std: Add `SubMsg:reply_never` constructor ([#1929])

packages/vm/src/cache.rs

Lines changed: 53 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::marker::PhantomData;
55
use std::path::{Path, PathBuf};
66
use std::str::FromStr;
77
use std::sync::Mutex;
8-
use wasmer::{Engine, Store};
8+
use wasmer::{Module, Store};
99

1010
use cosmwasm_std::Checksum;
1111

@@ -19,7 +19,7 @@ use crate::modules::{CachedModule, FileSystemCache, InMemoryCache, PinnedMemoryC
1919
use crate::parsed_wasm::ParsedWasm;
2020
use crate::size::Size;
2121
use crate::static_analysis::{Entrypoint, ExportInfo, REQUIRED_IBC_EXPORTS};
22-
use crate::wasm_backend::{compile, make_compiling_engine, make_runtime_engine};
22+
use crate::wasm_backend::{compile, make_compiling_engine};
2323

2424
const STATE_DIR: &str = "state";
2525
// Things related to the state of the blockchain.
@@ -91,24 +91,14 @@ pub struct CacheInner {
9191
memory_cache: InMemoryCache,
9292
fs_cache: FileSystemCache,
9393
stats: Stats,
94-
/// A single engine to execute all contracts in this cache instance (usually
95-
/// this means all contracts in the process).
96-
///
97-
/// This engine is headless, i.e. does not contain a Singlepass compiler.
98-
/// It only executes modules compiled with other engines.
99-
///
100-
/// The engine has one memory limit set which is the same for all contracts
101-
/// running with it. If different memory limits would be needed for different
102-
/// contracts at some point, we'd need multiple engines. This is because the tunables
103-
/// that control the limit are attached to the engine.
104-
runtime_engine: Engine,
10594
}
10695

10796
pub struct Cache<A: BackendApi, S: Storage, Q: Querier> {
10897
/// Available capabilities are immutable for the lifetime of the cache,
10998
/// i.e. any number of read-only references is allowed to access it concurrently.
11099
available_capabilities: HashSet<String>,
111100
inner: Mutex<CacheInner>,
101+
instance_memory_limit: Size,
112102
// Those two don't store data but only fix type information
113103
type_api: PhantomData<A>,
114104
type_storage: PhantomData<S>,
@@ -169,8 +159,8 @@ where
169159
memory_cache: InMemoryCache::new(memory_cache_size),
170160
fs_cache,
171161
stats: Stats::default(),
172-
runtime_engine: make_runtime_engine(Some(instance_memory_limit)),
173162
}),
163+
instance_memory_limit,
174164
type_storage: PhantomData::<S>,
175165
type_api: PhantomData::<A>,
176166
type_querier: PhantomData::<Q>,
@@ -318,11 +308,12 @@ where
318308
// for a not-so-relevant use case.
319309

320310
// Try to get module from file system cache
321-
if let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? {
311+
if let Some(cached_module) = cache
312+
.fs_cache
313+
.load(checksum, Some(self.instance_memory_limit))?
314+
{
322315
cache.stats.hits_fs_cache = cache.stats.hits_fs_cache.saturating_add(1);
323-
return cache
324-
.pinned_memory_cache
325-
.store(checksum, module, module_size);
316+
return cache.pinned_memory_cache.store(checksum, cached_module);
326317
}
327318

328319
// Re-compile from original Wasm bytecode
@@ -337,16 +328,16 @@ where
337328
}
338329

339330
// This time we'll hit the file-system cache.
340-
let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)?
331+
let Some(cached_module) = cache
332+
.fs_cache
333+
.load(checksum, Some(self.instance_memory_limit))?
341334
else {
342335
return Err(VmError::generic_err(
343336
"Can't load module from file system cache after storing it to file system cache (pin)",
344337
));
345338
};
346339

347-
cache
348-
.pinned_memory_cache
349-
.store(checksum, module, module_size)
340+
cache.pinned_memory_cache.store(checksum, cached_module)
350341
}
351342

352343
/// Unpins a Module, i.e. removes it from the pinned memory cache.
@@ -370,10 +361,10 @@ where
370361
backend: Backend<A, S, Q>,
371362
options: InstanceOptions,
372363
) -> VmResult<Instance<A, S, Q>> {
373-
let (cached, store) = self.get_module(checksum)?;
364+
let (module, store) = self.get_module(checksum)?;
374365
let instance = Instance::from_module(
375366
store,
376-
&cached.module,
367+
&module,
377368
backend,
378369
options.gas_limit,
379370
None,
@@ -385,36 +376,49 @@ where
385376
/// Returns a module tied to a previously saved Wasm.
386377
/// Depending on availability, this is either generated from a memory cache, file system cache or Wasm code.
387378
/// This is part of `get_instance` but pulled out to reduce the locking time.
388-
fn get_module(&self, checksum: &Checksum) -> VmResult<(CachedModule, Store)> {
379+
fn get_module(&self, checksum: &Checksum) -> VmResult<(Module, Store)> {
389380
let mut cache = self.inner.lock().unwrap();
390381
// Try to get module from the pinned memory cache
391382
if let Some(element) = cache.pinned_memory_cache.load(checksum)? {
392383
cache.stats.hits_pinned_memory_cache =
393384
cache.stats.hits_pinned_memory_cache.saturating_add(1);
394-
let store = Store::new(cache.runtime_engine.clone());
395-
return Ok((element, store));
385+
let CachedModule {
386+
module,
387+
engine,
388+
size_estimate: _,
389+
} = element;
390+
let store = Store::new(engine);
391+
return Ok((module, store));
396392
}
397393

398394
// Get module from memory cache
399395
if let Some(element) = cache.memory_cache.load(checksum)? {
400396
cache.stats.hits_memory_cache = cache.stats.hits_memory_cache.saturating_add(1);
401-
let store = Store::new(cache.runtime_engine.clone());
402-
return Ok((element, store));
397+
let CachedModule {
398+
module,
399+
engine,
400+
size_estimate: _,
401+
} = element;
402+
let store = Store::new(engine);
403+
return Ok((module, store));
403404
}
404405

405406
// Get module from file system cache
406-
if let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? {
407+
if let Some(cached_module) = cache
408+
.fs_cache
409+
.load(checksum, Some(self.instance_memory_limit))?
410+
{
407411
cache.stats.hits_fs_cache = cache.stats.hits_fs_cache.saturating_add(1);
408412

409-
cache
410-
.memory_cache
411-
.store(checksum, module.clone(), module_size)?;
412-
let cached = CachedModule {
413+
cache.memory_cache.store(checksum, cached_module.clone())?;
414+
415+
let CachedModule {
413416
module,
414-
size_estimate: module_size,
415-
};
416-
let store = Store::new(cache.runtime_engine.clone());
417-
return Ok((cached, store));
417+
engine,
418+
size_estimate: _,
419+
} = cached_module;
420+
let store = Store::new(engine);
421+
return Ok((module, store));
418422
}
419423

420424
// Re-compile module from wasm
@@ -433,21 +437,23 @@ where
433437
}
434438

435439
// This time we'll hit the file-system cache.
436-
let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)?
440+
let Some(cached_module) = cache
441+
.fs_cache
442+
.load(checksum, Some(self.instance_memory_limit))?
437443
else {
438444
return Err(VmError::generic_err(
439445
"Can't load module from file system cache after storing it to file system cache (get_module)",
440446
));
441447
};
442-
cache
443-
.memory_cache
444-
.store(checksum, module.clone(), module_size)?;
445-
let cached = CachedModule {
448+
cache.memory_cache.store(checksum, cached_module.clone())?;
449+
450+
let CachedModule {
446451
module,
447-
size_estimate: module_size,
448-
};
449-
let store = Store::new(cache.runtime_engine.clone());
450-
Ok((cached, store))
452+
engine,
453+
size_estimate: _,
454+
} = cached_module;
455+
let store = Store::new(engine);
456+
Ok((module, store))
451457
}
452458
}
453459

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,30 @@
1-
use wasmer::Module;
1+
use wasmer::{Engine, Module};
2+
3+
/// Some manual tests on Simon's machine showed that Engine is roughly 3-5 KB big,
4+
/// so give it a constant 10 KiB estimate.
5+
#[inline]
6+
pub fn engine_size_estimate() -> usize {
7+
10 * 1024
8+
}
29

310
#[derive(Debug, Clone)]
411
pub struct CachedModule {
512
pub module: Module,
13+
/// The runtime engine to run this module. Ideally we could use a single engine
14+
/// for all modules but the memory issue described in <https://github.com/wasmerio/wasmer/issues/4377>
15+
/// requires using one engine per module as a workaround.
16+
pub engine: Engine,
617
/// The estimated size of this element in memory.
718
/// Since the cached modules are just [rkyv](https://rkyv.org/) dumps of the Module
819
/// instances, we use the file size of the module on disk (not the Wasm!)
920
/// as an estimate for this.
10-
/// Note: Since CosmWasm 1.4 (Wasmer 4), Store/Engine are not cached anymore.
11-
/// The majority of the Module size is the Artifact.
21+
///
22+
/// Between CosmWasm 1.4 (Wasmer 4) and 1.5.2, Store/Engine were not cached. This lead to a
23+
/// memory consumption problem. From 1.5.2 on, Module and Engine are cached and Store is created
24+
/// from Engine on demand.
25+
///
26+
/// The majority of the Module size is the Artifact which is why we use the module filesize as the estimate.
27+
/// Some manual tests on Simon's machine showed that Engine is roughly 3-5 KB big, so give it a constant
28+
/// estimate: [`engine_size_estimate`].
1229
pub size_estimate: usize,
1330
}

packages/vm/src/modules/file_system_cache.rs

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@ use std::io;
44
use std::path::{Path, PathBuf};
55
use thiserror::Error;
66

7-
use wasmer::{AsEngineRef, DeserializeError, Module, Target};
7+
use wasmer::{DeserializeError, Module, Target};
88

99
use cosmwasm_std::Checksum;
1010

1111
use crate::errors::{VmError, VmResult};
1212
use crate::filesystem::mkdir_p;
1313
use crate::modules::current_wasmer_module_version;
14+
use crate::wasm_backend::make_runtime_engine;
15+
use crate::Size;
16+
17+
use super::cached_module::engine_size_estimate;
18+
use super::CachedModule;
1419

1520
/// Bump this version whenever the module system changes in a way
1621
/// that old stored modules would be corrupt when loaded in the new system.
@@ -129,24 +134,29 @@ impl FileSystemCache {
129134
path
130135
}
131136

132-
/// Loads a serialized module from the file system and returns a module (i.e. artifact + store),
133-
/// along with the size of the serialized module.
137+
/// Loads a serialized module from the file system and returns a Module + Engine,
138+
/// along with a size estimation for the pair.
134139
pub fn load(
135140
&self,
136141
checksum: &Checksum,
137-
engine: &impl AsEngineRef,
138-
) -> VmResult<Option<(Module, usize)>> {
142+
memory_limit: Option<Size>,
143+
) -> VmResult<Option<CachedModule>> {
139144
let file_path = self.module_file(checksum);
140145

146+
let engine = make_runtime_engine(memory_limit);
141147
let result = if self.unchecked_modules {
142-
unsafe { Module::deserialize_from_file_unchecked(engine, &file_path) }
148+
unsafe { Module::deserialize_from_file_unchecked(&engine, &file_path) }
143149
} else {
144-
unsafe { Module::deserialize_from_file(engine, &file_path) }
150+
unsafe { Module::deserialize_from_file(&engine, &file_path) }
145151
};
146152
match result {
147153
Ok(module) => {
148154
let module_size = module_size(&file_path)?;
149-
Ok(Some((module, module_size)))
155+
Ok(Some(CachedModule {
156+
module,
157+
engine,
158+
size_estimate: module_size + engine_size_estimate(),
159+
}))
150160
}
151161
Err(DeserializeError::Io(err)) => match err.kind() {
152162
io::ErrorKind::NotFound => Ok(None),
@@ -225,7 +235,7 @@ mod tests {
225235
use super::*;
226236
use crate::{
227237
size::Size,
228-
wasm_backend::{compile, make_compiling_engine, make_runtime_engine},
238+
wasm_backend::{compile, make_compiling_engine},
229239
};
230240
use tempfile::TempDir;
231241
use wasmer::{imports, Instance as WasmerInstance, Store};
@@ -252,8 +262,7 @@ mod tests {
252262
let checksum = Checksum::generate(&wasm);
253263

254264
// Module does not exist
255-
let runtime_engine = make_runtime_engine(TESTING_MEMORY_LIMIT);
256-
let cached = cache.load(&checksum, &runtime_engine).unwrap();
265+
let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap();
257266
assert!(cached.is_none());
258267

259268
// Store module
@@ -262,14 +271,21 @@ mod tests {
262271
cache.store(&checksum, &module).unwrap();
263272

264273
// Load module
265-
let cached = cache.load(&checksum, &runtime_engine).unwrap();
274+
let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap();
266275
assert!(cached.is_some());
267276

268277
// Check the returned module is functional.
269278
// This is not really testing the cache API but better safe than sorry.
270279
{
271-
let (cached_module, module_size) = cached.unwrap();
272-
assert_eq!(module_size, module.serialize().unwrap().len());
280+
let CachedModule {
281+
module: cached_module,
282+
engine: runtime_engine,
283+
size_estimate,
284+
} = cached.unwrap();
285+
assert_eq!(
286+
size_estimate,
287+
module.serialize().unwrap().len() + 10240 /* engine size estimate */
288+
);
273289
let import_object = imports! {};
274290
let mut store = Store::new(runtime_engine);
275291
let instance = WasmerInstance::new(&mut store, &cached_module, &import_object).unwrap();
@@ -314,20 +330,25 @@ mod tests {
314330
let checksum = Checksum::generate(&wasm);
315331

316332
// Store module
317-
let engine1 = make_compiling_engine(TESTING_MEMORY_LIMIT);
318-
let module = compile(&engine1, &wasm).unwrap();
333+
let compiling_engine = make_compiling_engine(TESTING_MEMORY_LIMIT);
334+
let module = compile(&compiling_engine, &wasm).unwrap();
319335
cache.store(&checksum, &module).unwrap();
320336

321337
// It's there
322-
let engine2 = make_runtime_engine(TESTING_MEMORY_LIMIT);
323-
assert!(cache.load(&checksum, &engine2).unwrap().is_some());
338+
assert!(cache
339+
.load(&checksum, TESTING_MEMORY_LIMIT)
340+
.unwrap()
341+
.is_some());
324342

325343
// Remove module
326344
let existed = cache.remove(&checksum).unwrap();
327345
assert!(existed);
328346

329347
// it's gone now
330-
assert!(cache.load(&checksum, &engine2).unwrap().is_none());
348+
assert!(cache
349+
.load(&checksum, TESTING_MEMORY_LIMIT)
350+
.unwrap()
351+
.is_none());
331352

332353
// Remove again
333354
let existed = cache.remove(&checksum).unwrap();

0 commit comments

Comments
 (0)