Skip to content

Commit

Permalink
refactor: reinstate module cache (#106)
Browse files Browse the repository at this point in the history
* chore: set resolver to v2

* refactor: reinstate module cache

* test: fix module caching

* chore: update host package version to 0.0.91

* test(host): add test for cache

* refactor: move all wasmer-types related items to holochain-wasmer

* refactor(host): change identifier names according to refactors

* docs: update changelog

* test: fix module creation

* test: comment out bench wasm_instance due to upstream memory leak

* test: comment out bench wasm_module due to upstream memory leak

* refactor: move ios precompiled module getter to holochain-wasmer

* refactor: rename precompiled module fn

* build: switch to llvm compiler

* revert switch to llvm

* chore: revert host crate version bump

* test: fix concurrent calls

* chore: add cargo lock

* revert some formatting

* comment bench

* update docs

* fix: use cache's runtime engine

* bring back ios workaround

* test: put module test behind test cfg
  • Loading branch information
jost-s authored Jan 16, 2024
1 parent 973a599 commit c22964a
Show file tree
Hide file tree
Showing 6 changed files with 303 additions and 74 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

- **BREAKING CHANGE:** Instance cache in host crate has been removed in favor of a deserialized module cache `DeserializedModuleCache`. An abstraction for caching (serialized & deserialized modules) called `ModuleCache` was added.
- Refactor: All logic related to modules and wasmer caching from `holochain` has been moved to the host crate. Consequently functions for wasmer development under iOS need to be imported from there.

## [0.0.90]

- Bump wasmer to 4.2.4
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[workspace]
members = ["crates/host", "crates/common"]
exclude = ["test"]

resolver = "2"
284 changes: 231 additions & 53 deletions crates/host/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,30 @@ use bytes::Bytes;
use bytes::BytesMut;
use holochain_wasmer_common::WasmError;
use parking_lot::Mutex;
use parking_lot::RwLock;
use std::collections::BTreeMap;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use tracing::info;
use wasmer::wasmparser;
use wasmer::BaseTunables;
use wasmer::CompileError;
use wasmer::CompilerConfig;
use wasmer::CpuFeature;
use wasmer::Cranelift;
use wasmer::DeserializeError;
use wasmer::Engine;
use wasmer::Instance;
use wasmer::Module;
use wasmer::NativeEngineExt;
use wasmer::Store;
use wasmer::Target;
use wasmer::Triple;
use wasmer_middlewares::Metering;

/// We expect cache keys to be produced via hashing so 32 bytes is enough for all
/// purposes.
Expand Down Expand Up @@ -119,6 +130,60 @@ pub trait PlruCache {
}
}

#[cfg(not(test))]
/// one hundred giga ops
pub const WASM_METERING_LIMIT: u64 = 100_000_000_000;

#[cfg(test)]
/// ten mega ops.
/// We don't want tests to run forever, and it can take several minutes for 100 giga ops to run.
pub const WASM_METERING_LIMIT: u64 = 10_000_000;

/// Generate an engine with a wasm compiler
/// and Metering (use limits) in place.
pub fn make_compiler_engine() -> Engine {
let cost_function = |_operator: &wasmparser::Operator| -> u64 { 1 };
// @todo 100 giga-ops is totally arbitrary cutoff so we probably
// want to make the limit configurable somehow.
let metering = Arc::new(Metering::new(WASM_METERING_LIMIT, cost_function));
// the only place where the wasm compiler engine is set
let mut compiler = Cranelift::default();
compiler.canonicalize_nans(true).push_middleware(metering);
Engine::from(compiler)
}

/// Generate a runtime `Engine` without compiler suitable for iOS.
/// Useful for re-building an iOS Module from a preserialized WASM Module.
pub fn make_ios_runtime_engine() -> Engine {
Engine::headless()
}

/// Take WASM binary and prepare a wasmer Module suitable for iOS
pub fn build_ios_module(wasm: &[u8]) -> Result<Module, CompileError> {
info!(
"Found wasm and was instructed to serialize it for ios in wasmer format, doing so now..."
);
let compiler_engine = make_compiler_engine();
let store = Store::new(compiler_engine);
Module::from_binary(&store, wasm)
}

/// Deserialize a previously compiled module for iOS from a file.
pub fn get_ios_module_from_file(path: &PathBuf) -> Result<Module, DeserializeError> {
let engine = make_ios_runtime_engine();
unsafe { Module::deserialize_from_file(&engine, path) }
}

/// Configuration of a Target for wasmer for iOS
pub fn wasmer_ios_target() -> Target {
// use what I see in
// platform ios headless example
// https://github.com/wasmerio/wasmer/blob/447c2e3a152438db67be9ef649327fabcad6f5b8/examples/platform_ios_headless.rs#L38-L53
let triple = Triple::from_str("aarch64-apple-ios").unwrap();
let cpu_feature = CpuFeature::set();
Target::new(triple, cpu_feature)
}

/// Cache for serialized modules. These are fully compiled wasm modules that are
/// then serialized by wasmer and can be cached. A serialized wasm module must still
/// be deserialized before it can be used to build instances. The deserialization
Expand All @@ -128,7 +193,12 @@ pub struct SerializedModuleCache {
pub plru: MicroCache,
pub key_map: PlruKeyMap,
pub cache: BTreeMap<CacheKey, Arc<SerializedModule>>,
pub cranelift: fn() -> Cranelift,
// a function to create a new compiler engine for every module
pub make_compiler_engine: fn() -> Engine,
// the runtime engine has to live as long as the module;
// keeping it in the cache and using it for all modules
// make sure of that
pub runtime_engine: Engine,
pub maybe_fs_dir: Option<PathBuf>,
}

Expand Down Expand Up @@ -157,15 +227,21 @@ impl PlruCache for SerializedModuleCache {
}

impl SerializedModuleCache {
/// Build a default `SerializedModuleCache` with a `Cranelift` that will be used
/// to compile modules for serialization as needed.
pub fn default_with_cranelift(cranelift: fn() -> Cranelift) -> Self {
/// Build a default `SerializedModuleCache` with a fn to create an `Engine`
/// that will be used to compile modules from wasms as needed.
pub fn default_with_engine(
make_compiler_engine: fn() -> Engine,
maybe_fs_dir: Option<PathBuf>,
) -> Self {
Self {
cranelift,
make_compiler_engine,
// the engine to execute function calls on instances does not
// require a compiler
runtime_engine: Engine::headless(),
plru: MicroCache::default(),
key_map: PlruKeyMap::default(),
cache: BTreeMap::default(),
maybe_fs_dir: None,
maybe_fs_dir,
}
}

Expand All @@ -175,27 +251,13 @@ impl SerializedModuleCache {
.map(|dir_path| dir_path.clone().join(hex::encode(key)))
}

fn store(&self) -> Store {
let mut engine: Engine = (self.cranelift)().into();
// Workaround for invalid memory access on iOS.
// https://github.com/holochain/holochain/issues/3096
engine.set_tunables(BaseTunables {
static_memory_bound: 0x4000.into(),
static_memory_offset_guard_size: 0x1_0000,
dynamic_memory_offset_guard_size: 0x1_0000,
});
Store::new(engine)
}

/// Given a wasm, compiles with cranelift, serializes the result, adds it to
/// Given a wasm, compiles with compiler engine, serializes the result, adds it to
/// the cache and returns that.
fn get_with_build_cache(
&mut self,
key: CacheKey,
wasm: &[u8],
) -> Result<Arc<ModuleWithStore>, wasmer::RuntimeError> {
let store = self.store();

) -> Result<Arc<Module>, wasmer::RuntimeError> {
let maybe_module_path = self.module_path(key);
let (module, serialized_module) = match maybe_module_path.as_ref().map(|module_path| {
// We do this the long way to get `Bytes` instead of `Vec<u8>` so
Expand All @@ -218,13 +280,27 @@ impl SerializedModuleCache {
})?;
Ok::<bytes::Bytes, wasmer::RuntimeError>(bytes_mut.into_inner().freeze())
}) {
Some(Ok(serialized_module)) => (
unsafe { Module::deserialize(&store, serialized_module.clone()) }
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?,
serialized_module,
),
Some(Ok(serialized_module)) => {
let deserialized_module =
unsafe { Module::deserialize(&self.runtime_engine, serialized_module.clone()) }
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?;
(deserialized_module, serialized_module)
}
// no serialized module found on the file system, so serialize the
// wasm binary and write it to the file system
_fs_miss => {
let module = Module::from_binary(&store, wasm)
// Each module needs to be compiled with a new engine because
// of middleware like metering. Middleware is compiled into the
// module once and available in all instances created from it.
let mut compiler_engine = (self.make_compiler_engine)();
// Workaround for invalid memory access on iOS.
// https://github.com/holochain/holochain/issues/3096
compiler_engine.set_tunables(BaseTunables {
static_memory_bound: 0x4000.into(),
static_memory_offset_guard_size: 0x1_0000,
dynamic_memory_offset_guard_size: 0x1_0000,
});
let module = Module::from_binary(&compiler_engine, wasm)
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?;
let serialized_module = module
.serialize()
Expand Down Expand Up @@ -252,53 +328,61 @@ impl SerializedModuleCache {
}
}

// deserialize the module to be returned for instantiation
//
// A new middleware per module is required, hence a new engine
// per module is needed too. Serialization allows for uncoupling
// the module from the engine that was used for compilation.
// After that another engine can be used to deserialize the
// module again. The engine has to live as long as the module to
// prevent memory access out of bounds errors.
//
// This procedure facilitates caching of modules that can be
// instatiated with fresh stores free from state. Instance
// creation is highly performant which makes caching of instances
// and stores unnecessary.
let module = unsafe {
Module::deserialize(&self.runtime_engine, serialized_module.clone())
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?
};

(module, serialized_module)
}
};
self.put_item(key, Arc::new(serialized_module.clone()));

Ok(Arc::new(ModuleWithStore {
store: Arc::new(Mutex::new(store)),
module: Arc::new(module),
}))
Ok(Arc::new(module))
}

/// Given a wasm, attempts to get the serialized module for it from the cache.
/// If the cache misses a new serialized module, will be built from the wasm.
pub fn get(
&mut self,
key: CacheKey,
wasm: &[u8],
) -> Result<Arc<ModuleWithStore>, wasmer::RuntimeError> {
/// If the cache misses, a new serialized module will be built from the wasm.
pub fn get(&mut self, key: CacheKey, wasm: &[u8]) -> Result<Arc<Module>, wasmer::RuntimeError> {
match self.cache.get(&key) {
Some(serialized_module) => {
let store = self.store();
let module = unsafe { Module::deserialize(&store, (**serialized_module).clone()) }
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?;
let module = unsafe {
Module::deserialize(&self.runtime_engine, (**serialized_module).clone())
}
.map_err(|e| wasm_error!(WasmErrorInner::Compile(e.to_string())))?;
self.touch(&key);
Ok(Arc::new(ModuleWithStore {
store: Arc::new(Mutex::new(store)),
module: Arc::new(module),
}))
Ok(Arc::new(module))
}
None => self.get_with_build_cache(key, wasm),
}
}
}

/// Caches wasm instances. Reusing wasm instances allows maximum speed in function
/// calls but also introduces the possibility of memory corruption or other bad
/// state that is inappropriate to persist/reuse/access across calls. It is the
/// responsibility of the host to discard instances that are not eligible for reuse.
/// Caches deserialized wasm modules. Deserialization of cached modules from
/// the cache to create callable instances is slow. Therefore modules are
/// cached in memory after deserialization.
#[derive(Default, Debug)]
pub struct InstanceCache {
pub struct DeserializedModuleCache {
plru: MicroCache,
key_map: PlruKeyMap,
cache: BTreeMap<CacheKey, Arc<InstanceWithStore>>,
cache: BTreeMap<CacheKey, Arc<Module>>,
}

impl PlruCache for InstanceCache {
type Item = InstanceWithStore;
impl PlruCache for DeserializedModuleCache {
type Item = Module;

fn plru_mut(&mut self) -> &mut MicroCache {
&mut self.plru
Expand All @@ -320,3 +404,97 @@ impl PlruCache for InstanceCache {
&mut self.cache
}
}

#[derive(Debug)]
pub struct ModuleCache {
serialized_module_cache: Arc<RwLock<SerializedModuleCache>>,
deserialized_module_cache: Arc<RwLock<DeserializedModuleCache>>,
}

impl ModuleCache {
pub fn new(maybe_fs_dir: Option<PathBuf>) -> Self {
let serialized_module_cache = Arc::new(RwLock::new(
SerializedModuleCache::default_with_engine(make_compiler_engine, maybe_fs_dir),
));
let deserialized_module_cache = Arc::new(RwLock::new(DeserializedModuleCache::default()));
ModuleCache {
serialized_module_cache,
deserialized_module_cache,
}
}

pub fn get(&self, key: CacheKey, wasm: &[u8]) -> Result<Arc<Module>, wasmer::RuntimeError> {
// check deserialized module cache first for module
{
let mut deserialized_cache = self.deserialized_module_cache.write();
if let Some(module) = deserialized_cache.get_item(&key) {
return Ok(module);
}
}
// get module from serialized cache otherwise
// if cache does not contain module, it will be built from wasm bytes
// and then cached in serialized cache
let module;
{
let mut serialized_cache = self.serialized_module_cache.write();
module = serialized_cache.get(key, wasm)?;
}
// cache in deserialized module cache too
{
let mut deserialized_cache = self.deserialized_module_cache.write();
deserialized_cache.put_item(key, module.clone());
}
Ok(module)
}
}

#[cfg(test)]
pub mod tests {
use crate::module::{CacheKey, ModuleCache, PlruCache};

#[test]
fn cache_test() {
// simple example wasm taken from wasmer docs
// https://docs.rs/wasmer/latest/wasmer/struct.Module.html#example
let wasm: Vec<u8> = vec![
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, 0x01, 0x7f,
0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x0b, 0x01, 0x07, 0x61, 0x64, 0x64, 0x5f,
0x6f, 0x6e, 0x65, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x41, 0x01,
0x6a, 0x0b, 0x00, 0x1a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x0a, 0x01, 0x00, 0x07,
0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02,
0x70, 0x30,
];
let module_cache = ModuleCache::new(None);
assert_eq!(
module_cache.serialized_module_cache.read().cache.is_empty(),
true
);
assert_eq!(
module_cache
.deserialized_module_cache
.read()
.cache
.is_empty(),
true
);

let key: CacheKey = [0u8; 32].into();
let module = module_cache.get(key.clone(), &wasm).unwrap();

// make sure module has been stored in serialized cache under key
{
let serialized_cached_module =
module_cache.serialized_module_cache.write().get_item(&key);
assert_eq!(matches!(serialized_cached_module, Some(_)), true);
}
// make sure module has been stored in deserialized cache under key
{
let deserialized_cached_module = module_cache
.deserialized_module_cache
.write()
.get_item(&key)
.unwrap();
assert_eq!(*deserialized_cached_module, *module);
}
}
}
Loading

0 comments on commit c22964a

Please sign in to comment.