-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: reinstate module cache #106
Changes from all commits
4ca1d96
74e689b
f64a928
55f7b99
e6679d1
5eb7ce4
7f4ce38
d663a56
29409f7
febc87c
7c39692
3aa836d
6351ca2
1624030
033be03
b1e5afa
fb54445
830dee7
00ac4ab
0047757
307fe6a
5183d21
e22e562
80c7c24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
[workspace] | ||
members = ["crates/host", "crates/common"] | ||
exclude = ["test"] | ||
|
||
resolver = "2" | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,19 +6,30 @@ use bytes::Bytes; | |
use bytes::BytesMut; | ||
use holochain_wasmer_common::WasmError; | ||
use parking_lot::Mutex; | ||
use parking_lot::RwLock; | ||
use std::collections::BTreeMap; | ||
use std::fs::File; | ||
use std::fs::OpenOptions; | ||
use std::io::Write; | ||
use std::path::PathBuf; | ||
use std::str::FromStr; | ||
use std::sync::Arc; | ||
use tracing::info; | ||
use wasmer::wasmparser; | ||
use wasmer::BaseTunables; | ||
use wasmer::CompileError; | ||
use wasmer::CompilerConfig; | ||
use wasmer::CpuFeature; | ||
use wasmer::Cranelift; | ||
use wasmer::DeserializeError; | ||
use wasmer::Engine; | ||
use wasmer::Instance; | ||
use wasmer::Module; | ||
use wasmer::NativeEngineExt; | ||
use wasmer::Store; | ||
use wasmer::Target; | ||
use wasmer::Triple; | ||
use wasmer_middlewares::Metering; | ||
|
||
/// We expect cache keys to be produced via hashing so 32 bytes is enough for all | ||
/// purposes. | ||
|
@@ -119,6 +130,60 @@ pub trait PlruCache { | |
} | ||
} | ||
|
||
#[cfg(not(test))] | ||
/// one hundred giga ops | ||
pub const WASM_METERING_LIMIT: u64 = 100_000_000_000; | ||
|
||
#[cfg(test)] | ||
/// ten mega ops. | ||
/// We don't want tests to run forever, and it can take several minutes for 100 giga ops to run. | ||
pub const WASM_METERING_LIMIT: u64 = 10_000_000; | ||
|
||
/// Generate an engine with a wasm compiler | ||
/// and Metering (use limits) in place. | ||
pub fn make_compiler_engine() -> Engine { | ||
let cost_function = |_operator: &wasmparser::Operator| -> u64 { 1 }; | ||
// @todo 100 giga-ops is totally arbitrary cutoff so we probably | ||
// want to make the limit configurable somehow. | ||
let metering = Arc::new(Metering::new(WASM_METERING_LIMIT, cost_function)); | ||
// the only place where the wasm compiler engine is set | ||
let mut compiler = Cranelift::default(); | ||
compiler.canonicalize_nans(true).push_middleware(metering); | ||
Engine::from(compiler) | ||
} | ||
|
||
/// Generate a runtime `Engine` without compiler suitable for iOS. | ||
/// Useful for re-building an iOS Module from a preserialized WASM Module. | ||
pub fn make_ios_runtime_engine() -> Engine { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be behind a platform flag so they aren't available everywhere? Or are they valid anywhere but just recommended for iOS? Actually I think even in the second case, if they have ios in the name I'd be in favour of them being behind a platform condition There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They haven't been behind a feature flag before inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood, I still think they probably ought to be but as this was just moved code it can wait for a less busy time :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another limiting factor is that where they're used in |
||
Engine::headless() | ||
} | ||
|
||
/// Take WASM binary and prepare a wasmer Module suitable for iOS | ||
pub fn build_ios_module(wasm: &[u8]) -> Result<Module, CompileError> { | ||
info!( | ||
"Found wasm and was instructed to serialize it for ios in wasmer format, doing so now..." | ||
); | ||
let compiler_engine = make_compiler_engine(); | ||
let store = Store::new(compiler_engine); | ||
Module::from_binary(&store, wasm) | ||
} | ||
|
||
/// Deserialize a previously compiled module for iOS from a file. | ||
pub fn get_ios_module_from_file(path: &PathBuf) -> Result<Module, DeserializeError> { | ||
let engine = make_ios_runtime_engine(); | ||
unsafe { Module::deserialize_from_file(&engine, path) } | ||
} | ||
|
||
/// Configuration of a Target for wasmer for iOS | ||
pub fn wasmer_ios_target() -> Target { | ||
// use what I see in | ||
// platform ios headless example | ||
// https://github.com/wasmerio/wasmer/blob/447c2e3a152438db67be9ef649327fabcad6f5b8/examples/platform_ios_headless.rs#L38-L53 | ||
let triple = Triple::from_str("aarch64-apple-ios").unwrap(); | ||
let cpu_feature = CpuFeature::set(); | ||
Target::new(triple, cpu_feature) | ||
} | ||
|
||
Comment on lines
+133
to
+186
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All these items come from |
||
/// Cache for serialized modules. These are fully compiled wasm modules that are | ||
/// then serialized by wasmer and can be cached. A serialized wasm module must still | ||
/// be deserialized before it can be used to build instances. The deserialization | ||
|
@@ -128,7 +193,12 @@ pub struct SerializedModuleCache { | |
pub plru: MicroCache, | ||
pub key_map: PlruKeyMap, | ||
pub cache: BTreeMap<CacheKey, Arc<SerializedModule>>, | ||
pub cranelift: fn() -> Cranelift, | ||
// a function to create a new compiler engine for every module | ||
pub make_compiler_engine: fn() -> Engine, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To keep this independent of the compiler, I changed it return an engine |
||
// the runtime engine has to live as long as the module; | ||
// keeping it in the cache and using it for all modules | ||
// make sure of that | ||
pub runtime_engine: Engine, | ||
pub maybe_fs_dir: Option<PathBuf>, | ||
} | ||
|
||
|
@@ -157,15 +227,21 @@ impl PlruCache for SerializedModuleCache { | |
} | ||
|
||
impl SerializedModuleCache { | ||
/// Build a default `SerializedModuleCache` with a `Cranelift` that will be used | ||
/// to compile modules for serialization as needed. | ||
pub fn default_with_cranelift(cranelift: fn() -> Cranelift) -> Self { | ||
/// Build a default `SerializedModuleCache` with a fn to create an `Engine` | ||
/// that will be used to compile modules from wasms as needed. | ||
pub fn default_with_engine( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. arguably this can be renamed to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds reasonable to me, is this a breaking change anyway? If so I'd go for the rename. Otherwise maybe leave a comment behind to do it later? |
||
make_compiler_engine: fn() -> Engine, | ||
maybe_fs_dir: Option<PathBuf>, | ||
) -> Self { | ||
Self { | ||
cranelift, | ||
make_compiler_engine, | ||
// the engine to execute function calls on instances does not | ||
// require a compiler | ||
runtime_engine: Engine::headless(), | ||
plru: MicroCache::default(), | ||
key_map: PlruKeyMap::default(), | ||
cache: BTreeMap::default(), | ||
maybe_fs_dir: None, | ||
maybe_fs_dir, | ||
} | ||
} | ||
|
||
|
@@ -175,27 +251,13 @@ impl SerializedModuleCache { | |
.map(|dir_path| dir_path.clone().join(hex::encode(key))) | ||
} | ||
|
||
fn store(&self) -> Store { | ||
let mut engine: Engine = (self.cranelift)().into(); | ||
// Workaround for invalid memory access on iOS. | ||
// https://github.com/holochain/holochain/issues/3096 | ||
engine.set_tunables(BaseTunables { | ||
static_memory_bound: 0x4000.into(), | ||
static_memory_offset_guard_size: 0x1_0000, | ||
dynamic_memory_offset_guard_size: 0x1_0000, | ||
}); | ||
Store::new(engine) | ||
} | ||
|
||
/// Given a wasm, compiles with cranelift, serializes the result, adds it to | ||
/// Given a wasm, compiles with compiler engine, serializes the result, adds it to | ||
/// the cache and returns that. | ||
fn get_with_build_cache( | ||
&mut self, | ||
key: CacheKey, | ||
wasm: &[u8], | ||
) -> Result<Arc<ModuleWithStore>, wasmer::RuntimeError> { | ||
let store = self.store(); | ||
|
||
) -> Result<Arc<Module>, wasmer::RuntimeError> { | ||
let maybe_module_path = self.module_path(key); | ||
let (module, serialized_module) = match maybe_module_path.as_ref().map(|module_path| { | ||
// We do this the long way to get `Bytes` instead of `Vec<u8>` so | ||
|
@@ -218,13 +280,27 @@ impl SerializedModuleCache { | |
})?; | ||
Ok::<bytes::Bytes, wasmer::RuntimeError>(bytes_mut.into_inner().freeze()) | ||
}) { | ||
Some(Ok(serialized_module)) => ( | ||
unsafe { Module::deserialize(&store, serialized_module.clone()) } | ||
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?, | ||
serialized_module, | ||
), | ||
Some(Ok(serialized_module)) => { | ||
let deserialized_module = | ||
unsafe { Module::deserialize(&self.runtime_engine, serialized_module.clone()) } | ||
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?; | ||
(deserialized_module, serialized_module) | ||
} | ||
// no serialized module found on the file system, so serialize the | ||
// wasm binary and write it to the file system | ||
_fs_miss => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is a file system miss the only option here? What about other errors in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is existing code, just checking while we're here and looking at it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In case of a Some(Err) the err will be swallowed, something to improve I agree |
||
let module = Module::from_binary(&store, wasm) | ||
// Each module needs to be compiled with a new engine because | ||
// of middleware like metering. Middleware is compiled into the | ||
// module once and available in all instances created from it. | ||
let mut compiler_engine = (self.make_compiler_engine)(); | ||
// Workaround for invalid memory access on iOS. | ||
// https://github.com/holochain/holochain/issues/3096 | ||
compiler_engine.set_tunables(BaseTunables { | ||
static_memory_bound: 0x4000.into(), | ||
static_memory_offset_guard_size: 0x1_0000, | ||
dynamic_memory_offset_guard_size: 0x1_0000, | ||
}); | ||
let module = Module::from_binary(&compiler_engine, wasm) | ||
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?; | ||
let serialized_module = module | ||
.serialize() | ||
|
@@ -252,53 +328,61 @@ impl SerializedModuleCache { | |
} | ||
} | ||
|
||
// deserialize the module to be returned for instantiation | ||
// | ||
// A new middleware per module is required, hence a new engine | ||
// per module is needed too. Serialization allows for uncoupling | ||
// the module from the engine that was used for compilation. | ||
// After that another engine can be used to deserialize the | ||
// module again. The engine has to live as long as the module to | ||
// prevent memory access out of bounds errors. | ||
// | ||
// This procedure facilitates caching of modules that can be | ||
// instatiated with fresh stores free from state. Instance | ||
// creation is highly performant which makes caching of instances | ||
// and stores unnecessary. | ||
let module = unsafe { | ||
Module::deserialize(&self.runtime_engine, serialized_module.clone()) | ||
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))? | ||
}; | ||
|
||
(module, serialized_module) | ||
} | ||
}; | ||
self.put_item(key, Arc::new(serialized_module.clone())); | ||
|
||
Ok(Arc::new(ModuleWithStore { | ||
store: Arc::new(Mutex::new(store)), | ||
module: Arc::new(module), | ||
})) | ||
Ok(Arc::new(module)) | ||
} | ||
|
||
/// Given a wasm, attempts to get the serialized module for it from the cache. | ||
/// If the cache misses a new serialized module, will be built from the wasm. | ||
pub fn get( | ||
&mut self, | ||
key: CacheKey, | ||
wasm: &[u8], | ||
) -> Result<Arc<ModuleWithStore>, wasmer::RuntimeError> { | ||
/// If the cache misses, a new serialized module will be built from the wasm. | ||
pub fn get(&mut self, key: CacheKey, wasm: &[u8]) -> Result<Arc<Module>, wasmer::RuntimeError> { | ||
match self.cache.get(&key) { | ||
Some(serialized_module) => { | ||
let store = self.store(); | ||
let module = unsafe { Module::deserialize(&store, (**serialized_module).clone()) } | ||
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?; | ||
let module = unsafe { | ||
Module::deserialize(&self.runtime_engine, (**serialized_module).clone()) | ||
} | ||
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?; | ||
self.touch(&key); | ||
Ok(Arc::new(ModuleWithStore { | ||
store: Arc::new(Mutex::new(store)), | ||
module: Arc::new(module), | ||
})) | ||
Ok(Arc::new(module)) | ||
} | ||
None => self.get_with_build_cache(key, wasm), | ||
} | ||
} | ||
} | ||
|
||
/// Caches wasm instances. Reusing wasm instances allows maximum speed in function | ||
/// calls but also introduces the possibility of memory corruption or other bad | ||
/// state that is inappropriate to persist/reuse/access across calls. It is the | ||
/// responsibility of the host to discard instances that are not eligible for reuse. | ||
/// Caches deserialized wasm modules. Deserialization of cached modules from | ||
/// the cache to create callable instances is slow. Therefore modules are | ||
/// cached in memory after deserialization. | ||
#[derive(Default, Debug)] | ||
pub struct InstanceCache { | ||
pub struct DeserializedModuleCache { | ||
plru: MicroCache, | ||
key_map: PlruKeyMap, | ||
cache: BTreeMap<CacheKey, Arc<InstanceWithStore>>, | ||
cache: BTreeMap<CacheKey, Arc<Module>>, | ||
} | ||
|
||
impl PlruCache for InstanceCache { | ||
type Item = InstanceWithStore; | ||
impl PlruCache for DeserializedModuleCache { | ||
type Item = Module; | ||
|
||
fn plru_mut(&mut self) -> &mut MicroCache { | ||
&mut self.plru | ||
|
@@ -320,3 +404,97 @@ impl PlruCache for InstanceCache { | |
&mut self.cache | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct ModuleCache { | ||
serialized_module_cache: Arc<RwLock<SerializedModuleCache>>, | ||
deserialized_module_cache: Arc<RwLock<DeserializedModuleCache>>, | ||
} | ||
|
||
impl ModuleCache { | ||
pub fn new(maybe_fs_dir: Option<PathBuf>) -> Self { | ||
let serialized_module_cache = Arc::new(RwLock::new( | ||
SerializedModuleCache::default_with_engine(make_compiler_engine, maybe_fs_dir), | ||
)); | ||
let deserialized_module_cache = Arc::new(RwLock::new(DeserializedModuleCache::default())); | ||
ModuleCache { | ||
serialized_module_cache, | ||
deserialized_module_cache, | ||
} | ||
} | ||
|
||
pub fn get(&self, key: CacheKey, wasm: &[u8]) -> Result<Arc<Module>, wasmer::RuntimeError> { | ||
// check deserialized module cache first for module | ||
{ | ||
let mut deserialized_cache = self.deserialized_module_cache.write(); | ||
if let Some(module) = deserialized_cache.get_item(&key) { | ||
return Ok(module); | ||
} | ||
} | ||
// get module from serialized cache otherwise | ||
// if cache does not contain module, it will be built from wasm bytes | ||
// and then cached in serialized cache | ||
let module; | ||
{ | ||
let mut serialized_cache = self.serialized_module_cache.write(); | ||
module = serialized_cache.get(key, wasm)?; | ||
} | ||
// cache in deserialized module cache too | ||
{ | ||
let mut deserialized_cache = self.deserialized_module_cache.write(); | ||
deserialized_cache.put_item(key, module.clone()); | ||
} | ||
Ok(module) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
pub mod tests { | ||
use crate::module::{CacheKey, ModuleCache, PlruCache}; | ||
|
||
#[test] | ||
fn cache_test() { | ||
// simple example wasm taken from wasmer docs | ||
// https://docs.rs/wasmer/latest/wasmer/struct.Module.html#example | ||
let wasm: Vec<u8> = vec![ | ||
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, 0x01, 0x7f, | ||
0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x0b, 0x01, 0x07, 0x61, 0x64, 0x64, 0x5f, | ||
0x6f, 0x6e, 0x65, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x41, 0x01, | ||
0x6a, 0x0b, 0x00, 0x1a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x0a, 0x01, 0x00, 0x07, | ||
0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02, | ||
0x70, 0x30, | ||
]; | ||
let module_cache = ModuleCache::new(None); | ||
assert_eq!( | ||
module_cache.serialized_module_cache.read().cache.is_empty(), | ||
true | ||
); | ||
assert_eq!( | ||
module_cache | ||
.deserialized_module_cache | ||
.read() | ||
.cache | ||
.is_empty(), | ||
true | ||
); | ||
|
||
let key: CacheKey = [0u8; 32].into(); | ||
let module = module_cache.get(key.clone(), &wasm).unwrap(); | ||
|
||
// make sure module has been stored in serialized cache under key | ||
{ | ||
let serialized_cached_module = | ||
module_cache.serialized_module_cache.write().get_item(&key); | ||
assert_eq!(matches!(serialized_cached_module, Some(_)), true); | ||
} | ||
// make sure module has been stored in deserialized cache under key | ||
{ | ||
let deserialized_cached_module = module_cache | ||
.deserialized_module_cache | ||
.write() | ||
.get_item(&key) | ||
.unwrap(); | ||
assert_eq!(*deserialized_cached_module, *module); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! Makes debugging feature problems so much easier