Skip to content

Commit

Permalink
feat: when deserialization of module from filesystem cache fails, rem…
Browse files Browse the repository at this point in the history
…ove from cache (#138)

* feat: remove in-memory SerializedModuleCache, move logic to perist serialized modules to filesystem into ModuleCache

* chore: cleaner docs

* chore: fmt + clippy

* chore: rename fn for clarity

* chore: clearer code

* feat: when deserialization of module from filesystem cache fails, remove from cache

* chore: comment clarity

Co-authored-by: ThetaSinner <[email protected]>

* chore: wording

Co-authored-by: ThetaSinner <[email protected]>

* chore: link to github discussion

Co-authored-by: Jost <[email protected]>

* chore: clearer simpler naming without redundant 'Serialized' and 'Deserialized'

* refactor: separate concerns ModuleBuilder from ModuleCache

* feat: tracing log when deleting file fails, re-build wasm and save to both caches after filesystem deserialization failure

* test: use TempDir so dir is deleted on Drop

---------

Co-authored-by: ThetaSinner <[email protected]>
Co-authored-by: Jost <[email protected]>
  • Loading branch information
3 people authored Jan 7, 2025
1 parent 4961063 commit dbbda5b
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 47 deletions.
8 changes: 7 additions & 1 deletion crates/common/src/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ pub enum WasmErrorInner {
/// Wasmer failed to build a Module from wasm byte code.
/// With the feature `wasmer_sys` enabled, building a Module includes compiling the wasm.
ModuleBuild(String),
/// Wasmer failed to serialize a Module to bytes.
ModuleSerialize(String),
/// Wasmer failed to deserialize a Module from bytes.
ModuleDeserialize(String),
/// The host failed to call a function in the guest.
CallError(String),
}
Expand All @@ -49,8 +53,10 @@ impl WasmErrorInner {
| Self::ErrorWhileError
// Bad memory is bad memory.
| Self::Memory
// Failing to build the wasmer Module means we cannot use it.
// Failing to build, serialize, or deserialize the wasmer Module means we cannot use it.
| Self::ModuleBuild(_)
| Self::ModuleSerialize(_)
| Self::ModuleDeserialize(_)
// This is ambiguous so best to treat as potentially corrupt.
| Self::CallError(_)
=> true,
Expand Down
107 changes: 70 additions & 37 deletions crates/host/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,52 +192,76 @@ impl ModuleCache {
match self.get_from_filesystem(key) {
// Filesystem cache hit, deserialize and save to cache
Ok(Some(serialized_module)) => {
let module = self.builder.from_serialized_module(serialized_module)?;
let module_result = self.builder.from_serialized_module(serialized_module);

// If deserialization fails, we assume the file is corrupt,
// so it is removed from the filesystem cache,
// and the wasm is re-added to the cache again.
let module = match module_result {
Ok(d) => Ok(d),
Err(_) => {
// Remove from filesystem
if let Err(e) = self.remove_from_filesystem(key) {
tracing::debug!("Failed to remove cached wasm from filesystem with cache key {:?}: {:?}", key, e);
}

// Build wasm and save to both caches
self.add_to_cache_and_filesystem(key, wasm)
}
}?;

self.add_to_cache(key, module.clone());

Ok(module)
}

// Filesystem cache miss, build wasm and save to both caches
_ => {
// Each module needs to be compiled with a new engine because
// of middleware like metering. Middleware is compiled into the
// module once and available in all instances created from it.
let module = self.builder.from_binary(wasm)?;

// Round trip the wasmer Module through serialization.
//
// A new middleware per module is required, hence a new engine
// per module is needed too. Serialization allows for uncoupling
// the module from the engine that was used for compilation.
// After that another engine can be used to deserialize the
// module again. The engine has to live as long as the module to
// prevent memory access out of bounds errors.
//
// This procedure facilitates caching of modules that can be
// instantiated with fresh stores free from state. Instance
// creation is highly performant which makes caching of instances
// and stores unnecessary.
//
// See https://github.com/wasmerio/wasmer/discussions/3829#discussioncomment-5790763
let serialized_module = module
.serialize()
.map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))?;
let module = self
.builder
.from_serialized_module(serialized_module.clone())?;

// Save serialized module to filesystem cache
self.add_to_filesystem(key, serialized_module)?;

// Save module to in-memory cache
self.add_to_cache(key, module.clone());

Ok(module)
}
_ => self.add_to_cache_and_filesystem(key, wasm),
}
}

/// Build a wasm, then save it to both the in-memory cache and filesystem
fn add_to_cache_and_filesystem(
&self,
key: CacheKey,
wasm: &[u8],
) -> Result<Arc<Module>, wasmer::RuntimeError> {
// Each module needs to be compiled with a new engine because
// of middleware like metering. Middleware is compiled into the
// module once and available in all instances created from it.
let module = self.builder.from_binary(wasm)?;

// Round trip the wasmer Module through serialization.
//
// A new middleware per module is required, hence a new engine
// per module is needed too. Serialization allows for uncoupling
// the module from the engine that was used for compilation.
// After that another engine can be used to deserialize the
// module again. The engine has to live as long as the module to
// prevent memory access out of bounds errors.
//
// This procedure facilitates caching of modules that can be
// instantiated with fresh stores free from state. Instance
// creation is highly performant which makes caching of instances
// and stores unnecessary.
//
// See https://github.com/wasmerio/wasmer/discussions/3829#discussioncomment-5790763
let serialized_module = module
.serialize()
.map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))?;
let module = self
.builder
.from_serialized_module(serialized_module.clone())?;

// Save serialized module to filesystem cache
self.add_to_filesystem(key, serialized_module)?;

// Save module to in-memory cache
self.add_to_cache(key, module.clone());

Ok(module)
}

/// Get serialized module from filesystem
fn get_from_filesystem(&self, key: CacheKey) -> Result<Option<Bytes>, wasmer::RuntimeError> {
self.filesystem_module_path(key)
Expand Down Expand Up @@ -297,6 +321,15 @@ impl ModuleCache {
Ok(())
}

// Remove serialized module from filesystem cache
fn remove_from_filesystem(&self, key: CacheKey) -> Result<(), std::io::Error> {
if let Some(fs_path) = self.filesystem_module_path(key) {
std::fs::remove_file(fs_path)?;
}

Ok(())
}

/// Check cache for module
fn get_from_cache(&self, key: CacheKey) -> Option<Arc<Module>> {
let mut cache = self.cache.write();
Expand Down
72 changes: 63 additions & 9 deletions crates/host/src/module/wasmer_sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ mod tests {
use super::make_engine;
use crate::module::{builder::ModuleBuilder, CacheKey, ModuleCache, PlruCache};
use std::io::Write;
use tempfile::tempdir;
use tempfile::TempDir;
use wasmer::Module;

#[test]
Expand All @@ -95,9 +95,10 @@ mod tests {
0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02,
0x70, 0x30,
];
let tmp_fs_cache_dir = tempdir().unwrap().into_path();
let tmp_fs_cache_dir = TempDir::new().unwrap();
let module_builder = ModuleBuilder::new(make_engine);
let module_cache = ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.clone()));
let module_cache =
ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.path().to_owned()));
assert!(module_cache
.filesystem_path
.clone()
Expand All @@ -123,8 +124,6 @@ mod tests {
module_cache.filesystem_path.unwrap().join(hex::encode(key));
assert!(std::fs::metadata(serialized_module_path).is_ok());
}

std::fs::remove_dir_all(tmp_fs_cache_dir).unwrap();
}

#[test]
Expand Down Expand Up @@ -165,17 +164,18 @@ mod tests {
0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02,
0x70, 0x30,
];
let tmp_fs_cache_dir = tempdir().unwrap().into_path();
let tmp_fs_cache_dir = TempDir::new().unwrap();
let module_builder = ModuleBuilder::new(make_engine);
let module_cache = ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.clone()));
let module_cache =
ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.path().to_owned()));
let key: CacheKey = [0u8; 32];

// Build module, serialize, save directly to filesystem
let compiler_engine = make_engine();
let module =
std::sync::Arc::new(Module::from_binary(&compiler_engine, wasm.as_slice()).unwrap());
let serialized_module = module.serialize().unwrap();
let serialized_module_path = tmp_fs_cache_dir.clone().join(hex::encode(key));
let serialized_module_path = tmp_fs_cache_dir.path().join(hex::encode(key));
let mut file = std::fs::OpenOptions::new()
.write(true)
.create_new(true)
Expand All @@ -198,7 +198,61 @@ mod tests {
*module.serialize().unwrap()
);
}
}

std::fs::remove_dir_all(tmp_fs_cache_dir).unwrap();
#[test]
fn cache_get_from_fs_corrupt() {
// simple example wasm taken from wasmer docs
// https://docs.rs/wasmer/latest/wasmer/struct.Module.html#example
let wasm: Vec<u8> = vec![
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, 0x01, 0x7f,
0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x0b, 0x01, 0x07, 0x61, 0x64, 0x64, 0x5f,
0x6f, 0x6e, 0x65, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x41, 0x01,
0x6a, 0x0b, 0x00, 0x1a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x0a, 0x01, 0x00, 0x07,
0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02,
0x70, 0x30,
];

// Bad serialized_wasm
let bad_serialized_wasm = vec![0x00];

let tmp_fs_cache_dir = TempDir::new().unwrap();
let module_builder = ModuleBuilder::new(make_engine);
let module_cache =
ModuleCache::new(module_builder, Some(tmp_fs_cache_dir.path().to_owned()));
let key: CacheKey = [0u8; 32];

// Build module, serialize, save directly to filesystem
let serialized_module_path = tmp_fs_cache_dir.path().join(hex::encode(key));
let mut file = std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&serialized_module_path)
.unwrap();
file.write_all(&bad_serialized_wasm).unwrap();

// Module can still be retrieved from fs cache, as it has been deleted from the filesystem and re-added to the cache
let res = module_cache.get(key, &wasm);
assert!(res.is_ok());

let compiler_engine = make_engine();
let module =
std::sync::Arc::new(Module::from_binary(&compiler_engine, wasm.as_slice()).unwrap());

// make sure module is stored in deserialized cache
{
let deserialized_cached_module = module_cache.cache.write().get_item(&key).unwrap();
assert_eq!(
*deserialized_cached_module.serialize().unwrap(),
*module.serialize().unwrap()
);
}

// make sure module has been stored in serialized filesystem cache
{
let serialized_module_path =
module_cache.filesystem_path.unwrap().join(hex::encode(key));
assert!(std::fs::metadata(serialized_module_path).is_ok());
}
}
}

0 comments on commit dbbda5b

Please sign in to comment.