diff --git a/examples/wasi.rs b/examples/wasi.rs index d76c146c69f..2f0db350bd8 100644 --- a/examples/wasi.rs +++ b/examples/wasi.rs @@ -43,7 +43,9 @@ fn main() -> Result<(), Box> { println!("Compiling module..."); // Let's compile the Wasm module. - let module = runtime.load_module_sync(&wasm_bytes[..])?; + let data = + wasmer_wasix::runtime::module_cache::HashedModuleData::new_sha256(wasm_bytes.clone()); + let module = runtime.load_hashed_module_sync(data)?; // Create a pipe for the module's stdout. let (stdout_tx, mut stdout_rx) = Pipe::channel(); diff --git a/lib/package/src/utils.rs b/lib/package/src/utils.rs index ce873da8fd0..133933c1891 100644 --- a/lib/package/src/utils.rs +++ b/lib/package/src/utils.rs @@ -62,6 +62,11 @@ pub fn from_disk(path: impl AsRef) -> Result bool { + is_tarball(std::io::Cursor::new(bytes)) || webc::detect(bytes).is_ok() +} + pub fn from_bytes(bytes: impl Into) -> Result { let bytes: Bytes = bytes.into(); diff --git a/lib/virtual-fs/src/lib.rs b/lib/virtual-fs/src/lib.rs index 9f3e9ce3277..2f0017fa98f 100644 --- a/lib/virtual-fs/src/lib.rs +++ b/lib/virtual-fs/src/lib.rs @@ -412,6 +412,16 @@ pub trait VirtualFile: }) } + /// Get the full contents of this file as an [`OwnedBuffer`]. + /// + /// **NOTE**: Only implement this if the file is already available in-memory + /// and can be cloned cheaply! + /// + /// Allows consumers to do zero-copy cloning of the underlying data. + fn as_owned_buffer(&self) -> Option { + None + } + /// Polls the file for when there is data to be read fn poll_read_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; diff --git a/lib/virtual-fs/src/mem_fs/file.rs b/lib/virtual-fs/src/mem_fs/file.rs index 48a7dc1989c..cc12fd2c9fe 100644 --- a/lib/virtual-fs/src/mem_fs/file.rs +++ b/lib/virtual-fs/src/mem_fs/file.rs @@ -507,6 +507,17 @@ impl VirtualFile for FileHandle { self.cursor = cursor; Ok(()) } + + fn as_owned_buffer(&self) -> Option { + let fs = self.filesystem.inner.read().ok()?; + + let inode = fs.storage.get(self.inode)?; + match inode { + Node::ReadOnlyFile(f) => Some(f.file.buffer.clone()), + Node::CustomFile(f) => f.file.lock().ok()?.as_owned_buffer(), + _ => None, + } + } } #[cfg(test)] diff --git a/lib/virtual-fs/src/webc_volume_fs.rs b/lib/virtual-fs/src/webc_volume_fs.rs index 6bb9c47ebd1..b14309e262c 100644 --- a/lib/virtual-fs/src/webc_volume_fs.rs +++ b/lib/virtual-fs/src/webc_volume_fs.rs @@ -251,6 +251,10 @@ impl VirtualFile for File { ) -> Poll> { Poll::Ready(Err(std::io::ErrorKind::PermissionDenied.into())) } + + fn as_owned_buffer(&self) -> Option { + Some(self.content.get_ref().clone()) + } } impl AsyncRead for File { diff --git a/lib/wasix/src/bin_factory/exec.rs b/lib/wasix/src/bin_factory/exec.rs index da7306f24aa..deb293766df 100644 --- a/lib/wasix/src/bin_factory/exec.rs +++ b/lib/wasix/src/bin_factory/exec.rs @@ -6,6 +6,7 @@ use crate::{ TaskJoinHandle, }, runtime::{ + module_cache::HashedModuleData, task_manager::{ TaskWasm, TaskWasmRecycle, TaskWasmRecycleProperties, TaskWasmRunProperties, }, @@ -43,7 +44,7 @@ pub async fn spawn_exec( #[tracing::instrument(level = "trace", skip_all, fields(%name))] pub async fn spawn_exec_wasm( - wasm: &[u8], + wasm: HashedModuleData, name: &str, env: WasiEnv, runtime: &Arc, @@ -85,10 +86,10 @@ pub fn package_command_by_name<'a>( pub async fn spawn_load_module( name: &str, - wasm: &[u8], + wasm: HashedModuleData, runtime: &Arc, ) -> Result { - match runtime.load_module(wasm).await { + match runtime.load_hashed_module(wasm).await { Ok(module) => Ok(module), Err(err) => { tracing::error!( diff --git a/lib/wasix/src/bin_factory/mod.rs b/lib/wasix/src/bin_factory/mod.rs index ff2189764d1..c823799ff32 100644 --- a/lib/wasix/src/bin_factory/mod.rs +++ b/lib/wasix/src/bin_factory/mod.rs @@ -8,6 +8,7 @@ use std::{ }; use anyhow::Context; +use shared_buffer::OwnedBuffer; use virtual_fs::{AsyncReadExt, FileSystem}; use wasmer::FunctionEnvMut; use wasmer_package::utils::from_bytes; @@ -24,6 +25,7 @@ pub use self::{ }; use crate::{ os::{command::Commands, task::TaskJoinHandle}, + runtime::module_cache::HashedModuleData, Runtime, SpawnError, WasiEnv, }; @@ -84,7 +86,8 @@ impl BinFactory { // Execute match executable { Executable::Wasm(bytes) => { - spawn_exec_wasm(&bytes, name.as_str(), env, &self.runtime).await + let data = HashedModuleData::new_xxhash(bytes.clone()); + spawn_exec_wasm(data, name.as_str(), env, &self.runtime).await } Executable::BinaryPackage(pkg) => { // Get the command that is going to be executed @@ -170,7 +173,7 @@ impl BinFactory { } pub enum Executable { - Wasm(bytes::Bytes), + Wasm(OwnedBuffer), BinaryPackage(BinaryPackage), } @@ -185,18 +188,35 @@ async fn load_executable_from_filesystem( .open(path) .context("Unable to open the file")?; - let mut data = Vec::with_capacity(f.size() as usize); - f.read_to_end(&mut data).await.context("Read failed")?; + // Fast path if the file is fully available in memory. + // Prevents redundant copying of the file data. + if let Some(buf) = f.as_owned_buffer() { + if wasmer_package::utils::is_container(buf.as_slice()) { + let bytes = buf.clone().into_bytes(); + if let Ok(container) = from_bytes(bytes.clone()) { + let pkg = BinaryPackage::from_webc(&container, rt) + .await + .context("Unable to load the package")?; + + return Ok(Executable::BinaryPackage(pkg)); + } + } - let bytes: bytes::Bytes = data.into(); + Ok(Executable::Wasm(buf)) + } else { + let mut data = Vec::with_capacity(f.size() as usize); + f.read_to_end(&mut data).await.context("Read failed")?; - if let Ok(container) = from_bytes(bytes.clone()) { - let pkg = BinaryPackage::from_webc(&container, rt) - .await - .context("Unable to load the package")?; + let bytes: bytes::Bytes = data.into(); - Ok(Executable::BinaryPackage(pkg)) - } else { - Ok(Executable::Wasm(bytes)) + if let Ok(container) = from_bytes(bytes.clone()) { + let pkg = BinaryPackage::from_webc(&container, rt) + .await + .context("Unable to load the package")?; + + Ok(Executable::BinaryPackage(pkg)) + } else { + Ok(Executable::Wasm(OwnedBuffer::from_bytes(bytes))) + } } } diff --git a/lib/wasix/src/os/command/builtins/cmd_wasmer.rs b/lib/wasix/src/os/command/builtins/cmd_wasmer.rs index 11289e6a26f..cd69e9e0afd 100644 --- a/lib/wasix/src/os/command/builtins/cmd_wasmer.rs +++ b/lib/wasix/src/os/command/builtins/cmd_wasmer.rs @@ -3,9 +3,10 @@ use std::{any::Any, path::PathBuf, sync::Arc}; use crate::{ bin_factory::spawn_exec_wasm, os::task::{OwnedTaskStatus, TaskJoinHandle}, - runtime::task_manager::InlineWaker, + runtime::{module_cache::HashedModuleData, task_manager::InlineWaker}, SpawnError, }; +use shared_buffer::OwnedBuffer; use virtual_fs::{AsyncReadExt, FileSystem}; use wasmer::FunctionEnvMut; use wasmer_package::utils::from_bytes; @@ -50,6 +51,12 @@ impl CmdWasmer { } } +#[derive(Debug, Clone)] +enum Executable { + Wasm(OwnedBuffer), + BinaryPackage(BinaryPackage), +} + impl CmdWasmer { async fn run( &self, @@ -59,11 +66,6 @@ impl CmdWasmer { what: Option, mut args: Vec, ) -> Result { - pub enum Executable { - Wasm(bytes::Bytes), - BinaryPackage(BinaryPackage), - } - // If the first argument is a '--' then skip it if args.first().map(|a| a.as_str()) == Some("--") { args = args.into_iter().skip(1).collect(); @@ -102,7 +104,7 @@ impl CmdWasmer { Executable::BinaryPackage(pkg) } else { - Executable::Wasm(bytes) + Executable::Wasm(OwnedBuffer::from_bytes(bytes)) } } else if let Ok(pkg) = self.get_package(&what).await { Executable::BinaryPackage(pkg) @@ -136,7 +138,10 @@ impl CmdWasmer { // Now run the module spawn_exec(binary, name, env, &self.runtime).await } - Executable::Wasm(bytes) => spawn_exec_wasm(&bytes, name, env, &self.runtime).await, + Executable::Wasm(bytes) => { + let data = HashedModuleData::new_xxhash(bytes); + spawn_exec_wasm(data, name, env, &self.runtime).await + } } } else { let _ = unsafe { stderr_write(parent_ctx, HELP_RUN.as_bytes()) }.await; diff --git a/lib/wasix/src/runtime/mod.rs b/lib/wasix/src/runtime/mod.rs index 41e1562c44d..82e3e2f3f02 100644 --- a/lib/wasix/src/runtime/mod.rs +++ b/lib/wasix/src/runtime/mod.rs @@ -5,10 +5,9 @@ pub mod task_manager; pub use self::task_manager::{SpawnType, VirtualTaskManager}; use self::{module_cache::CacheError, task_manager::InlineWaker}; +use module_cache::HashedModuleData; use wasmer_config::package::SuggestedCompilerOptimizations; -use wasmer_types::{ - target::UserCompilerOptimizations as WasmerSuggestedCompilerOptimizations, ModuleHash, -}; +use wasmer_types::target::UserCompilerOptimizations as WasmerSuggestedCompilerOptimizations; use std::{ fmt, @@ -115,29 +114,30 @@ where /// Load the module for a command. /// + /// Will load the module from the cache if possible, otherwise will compile. + /// /// NOTE: This always be preferred over [`Self::load_module`] to avoid /// re-hashing the module! fn load_command_module( &self, cmd: &BinaryPackageCommand, ) -> BoxFuture<'_, Result> { - let hash = *cmd.hash(); - let wasm = cmd.atom(); let module_cache = self.module_cache(); + let data = HashedModuleData::from_command(cmd); let engine = match self.engine_with_suggested_opts(&cmd.suggested_compiler_optimizations) { Ok(engine) => engine, Err(error) => { return Box::pin(async move { Err(SpawnError::CompileError { - module_hash: hash, + module_hash: *data.hash(), error, }) }) } }; - let task = async move { load_module(&engine, &module_cache, &wasm, hash).await }; + let task = async move { load_module(&engine, &module_cache, &data).await }; Box::pin(task) } @@ -147,24 +147,49 @@ where InlineWaker::block_on(self.load_command_module(cmd)) } - /// Load a a Webassembly module, trying to use a pre-compiled version if possible. + /// Load a WebAssembly module from raw bytes. + /// + /// Will load the module from the cache if possible, otherwise will compile. + #[deprecated( + since = "0.601.0", + note = "Use `load_command_module` or `load_hashed_module` instead - this method can have high overhead" + )] fn load_module<'a>(&'a self, wasm: &'a [u8]) -> BoxFuture<'a, Result> { let engine = self.engine(); let module_cache = self.module_cache(); - let hash = ModuleHash::xxhash(wasm); - - let task = async move { load_module(&engine, &module_cache, wasm, hash).await }; - + let data = HashedModuleData::new_xxhash(wasm.to_vec()); + let task = async move { load_module(&engine, &module_cache, &data).await }; Box::pin(task) } - /// Load a a Webassembly module, trying to use a pre-compiled version if possible. - /// - /// Non-async version of [`Self::load_module`]. + /// Synchronous version of [`Self::load_module`]. + #[deprecated( + since = "0.601.0", + note = "Use `load_command_module` or `load_hashed_module` instead - this method can have high overhead" + )] fn load_module_sync(&self, wasm: &[u8]) -> Result { + #[allow(deprecated)] InlineWaker::block_on(self.load_module(wasm)) } + /// Load a WebAssembly module from pre-hashed data. + /// + /// Will load the module from the cache if possible, otherwise will compile. + fn load_hashed_module( + &self, + module: HashedModuleData, + ) -> BoxFuture<'_, Result> { + let engine = self.engine(); + let module_cache = self.module_cache(); + let task = async move { load_module(&engine, &module_cache, &module).await }; + Box::pin(task) + } + + /// Synchronous version of [`Self::load_hashed_module`]. + fn load_hashed_module_sync(&self, wasm: HashedModuleData) -> Result { + InlineWaker::block_on(self.load_hashed_module(wasm)) + } + /// Callback thats invokes whenever the instance is tainted, tainting can occur /// for multiple reasons however the most common is a panic within the process fn on_taint(&self, _reason: TaintReason) {} @@ -200,9 +225,9 @@ pub type DynRuntime = dyn Runtime + Send + Sync; pub async fn load_module( engine: &wasmer::Engine, module_cache: &(dyn ModuleCache + Send + Sync), - wasm: &[u8], - wasm_hash: ModuleHash, + module: &HashedModuleData, ) -> Result { + let wasm_hash = *module.hash(); let result = module_cache.load(wasm_hash, engine).await; match result { @@ -217,11 +242,13 @@ pub async fn load_module( } } - let module = Module::new(&engine, wasm).map_err(|err| crate::SpawnError::CompileError { - module_hash: wasm_hash, - error: err, - })?; + let module = + Module::new(&engine, module.wasm()).map_err(|err| crate::SpawnError::CompileError { + module_hash: wasm_hash, + error: err, + })?; + // TODO: pass a [`HashedModule`] struct that is safe by construction. if let Err(e) = module_cache.save(wasm_hash, engine, &module).await { tracing::warn!( %wasm_hash, @@ -623,20 +650,44 @@ impl Runtime for OverriddenRuntime { if self.engine.is_some() || self.module_cache.is_some() { let engine = self.engine(); let module_cache = self.module_cache(); - let hash = ModuleHash::xxhash(wasm); - let task = async move { load_module(&engine, &module_cache, wasm, hash).await }; + let data = HashedModuleData::new_xxhash(wasm.to_vec()); + let task = async move { load_module(&engine, &module_cache, &data).await }; Box::pin(task) } else { + #[allow(deprecated)] self.inner.load_module(wasm) } } fn load_module_sync(&self, wasm: &[u8]) -> Result { + #[allow(deprecated)] if self.engine.is_some() || self.module_cache.is_some() { InlineWaker::block_on(self.load_module(wasm)) } else { self.inner.load_module_sync(wasm) } } + + fn load_hashed_module( + &self, + data: HashedModuleData, + ) -> BoxFuture<'_, Result> { + if self.engine.is_some() || self.module_cache.is_some() { + let engine = self.engine(); + let module_cache = self.module_cache(); + let task = async move { load_module(&engine, &module_cache, &data).await }; + Box::pin(task) + } else { + self.inner.load_hashed_module(data) + } + } + + fn load_hashed_module_sync(&self, wasm: HashedModuleData) -> Result { + if self.engine.is_some() || self.module_cache.is_some() { + InlineWaker::block_on(self.load_hashed_module(wasm)) + } else { + self.inner.load_hashed_module_sync(wasm) + } + } } diff --git a/lib/wasix/src/runtime/module_cache/hashed_module.rs b/lib/wasix/src/runtime/module_cache/hashed_module.rs new file mode 100644 index 00000000000..2d0b14370f9 --- /dev/null +++ b/lib/wasix/src/runtime/module_cache/hashed_module.rs @@ -0,0 +1,60 @@ +use shared_buffer::OwnedBuffer; +use wasmer_types::ModuleHash; + +use crate::bin_factory::BinaryPackageCommand; + +/// A wrapper around Webassembly code and its hash. +/// +/// Allows passing around WASM code and it's hash without the danger of +/// using a wrong hash. +/// +/// Safe by construction: can only be created from a [`BinaryCommand`], which +/// already has the hash embedded, or from bytes that will be hashed in the +/// constructor. +/// +/// Can be cloned cheaply. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct HashedModuleData { + hash: ModuleHash, + wasm: OwnedBuffer, +} + +impl HashedModuleData { + pub fn new_sha256(bytes: impl Into) -> Self { + let wasm = bytes.into(); + let hash = ModuleHash::sha256(&wasm); + Self { hash, wasm } + } + + /// Create new [`HashedModuleData`] from the given bytes, hashing the + /// the bytes into a [`ModuleHash`] with xxhash. + pub fn new_xxhash(bytes: impl Into) -> Self { + let wasm = bytes.into(); + let hash = ModuleHash::xxhash(&wasm); + Self { hash, wasm } + } + + /// Create new [`HashedModuleData`] from the given [`BinaryPackageCommand`]. + /// + /// This is very cheap, as the hash is already available in the command. + pub fn from_command(command: &BinaryPackageCommand) -> Self { + Self { + hash: *command.hash(), + wasm: command.atom(), + } + } + + /// Get the module hash. + pub fn hash(&self) -> &ModuleHash { + &self.hash + } + + /// Get the WASM code. + pub fn wasm(&self) -> &OwnedBuffer { + &self.wasm + } + + pub fn into_parts(self) -> (ModuleHash, OwnedBuffer) { + (self.hash, self.wasm) + } +} diff --git a/lib/wasix/src/runtime/module_cache/mod.rs b/lib/wasix/src/runtime/module_cache/mod.rs index d786be8c40d..ba1d5242fbd 100644 --- a/lib/wasix/src/runtime/module_cache/mod.rs +++ b/lib/wasix/src/runtime/module_cache/mod.rs @@ -31,6 +31,8 @@ //! caching strategies. For example, you could use the [`FallbackCache`] to //! chain a fast in-memory cache with a slower file-based cache as a fallback. +mod hashed_module; + mod fallback; #[cfg(feature = "sys-thread")] mod filesystem; @@ -40,6 +42,7 @@ mod types; pub use self::{ fallback::FallbackCache, + hashed_module::HashedModuleData, shared::SharedCache, thread_local::ThreadLocalCache, types::{CacheError, ModuleCache}, diff --git a/lib/wasix/src/state/linker.rs b/lib/wasix/src/state/linker.rs index 1b0e0d4ddd9..d43d26d15af 100644 --- a/lib/wasix/src/state/linker.rs +++ b/lib/wasix/src/state/linker.rs @@ -272,6 +272,7 @@ use std::{ use bus::Bus; use derive_more::Debug; +use shared_buffer::OwnedBuffer; use tracing::trace; use virtual_fs::{AsyncReadExt, FileSystem, FsError}; use virtual_mio::InlineWaker; @@ -284,8 +285,9 @@ use wasmer::{ use wasmer_wasix_types::wasix::WasiMemoryLayout; use crate::{ - fs::WasiFsRoot, import_object_for_all_wasi_versions, Runtime, SpawnError, WasiEnv, WasiError, - WasiFs, WasiFunctionEnv, WasiModuleTreeHandles, WasiProcess, WasiThreadId, + fs::WasiFsRoot, import_object_for_all_wasi_versions, runtime::module_cache::HashedModuleData, + Runtime, SpawnError, WasiEnv, WasiError, WasiFs, WasiFunctionEnv, WasiModuleTreeHandles, + WasiProcess, WasiThreadId, }; use super::{WasiModuleInstanceHandles, WasiState}; @@ -2284,7 +2286,7 @@ impl LinkerState { } // Locate and load the module bytes - let (module_bytes, paths) = match module_spec { + let (module_data, paths) = match module_spec { DlModuleSpec::FileSystem { module_spec, ld_library_path, @@ -2307,12 +2309,15 @@ impl LinkerState { return Ok(INVALID_MODULE_HANDLE); } - (Cow::Owned(bytes), Some((full_path, ld_library_path))) + ( + HashedModuleData::new_sha256(bytes), + Some((full_path, ld_library_path)), + ) } - DlModuleSpec::Memory { bytes, .. } => (Cow::Borrowed(bytes), None), + DlModuleSpec::Memory { bytes, .. } => (HashedModuleData::new_sha256(bytes), None), }; - let module = runtime.load_module_sync(module_bytes.as_ref())?; + let module = runtime.load_hashed_module_sync(module_data)?; let dylink_info = parse_dylink0_section(&module)?; @@ -3922,11 +3927,11 @@ async fn locate_module( runtime_path: &[impl AsRef], calling_module_path: Option>, fs: &WasiFs, -) -> Result<(PathBuf, Vec), LinkError> { +) -> Result<(PathBuf, OwnedBuffer), LinkError> { async fn try_load( fs: &WasiFsRoot, path: impl AsRef, - ) -> Result<(PathBuf, Vec), FsError> { + ) -> Result<(PathBuf, OwnedBuffer), FsError> { let mut file = match fs.new_open_options().read(true).open(path.as_ref()) { Ok(f) => f, // Fallback for cases where the module thinks it's running on unix, @@ -3938,8 +3943,14 @@ async fn locate_module( Err(e) => return Err(e), }; - let mut buf = Vec::new(); - file.read_to_end(&mut buf).await?; + let buf = if let Some(buf) = file.as_owned_buffer() { + buf + } else { + let mut buf = Vec::new(); + file.read_to_end(&mut buf).await?; + OwnedBuffer::from(buf) + }; + Ok((path.as_ref().to_owned(), buf)) }