diff --git a/Cargo.lock b/Cargo.lock index df9e018c81..4f514a897b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -266,9 +266,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.21" +version = "1.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" +checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1" dependencies = [ "jobserver", "libc", @@ -708,7 +708,7 @@ dependencies = [ "wasm-smith", "wasmi 0.46.0", "wasmi_fuzz", - "wasmprinter 0.228.0", + "wasmprinter 0.229.0", ] [[package]] @@ -734,9 +734,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", @@ -897,7 +897,7 @@ version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" dependencies = [ - "getrandom 0.3.2", + "getrandom 0.3.3", "libc", ] @@ -1667,12 +1667,12 @@ dependencies = [ [[package]] name = "wasm-encoder" -version = "0.228.0" +version = "0.229.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d30290541f2d4242a162bbda76b8f2d8b1ac59eab3568ed6f2327d52c9b2c4" +checksum = "38ba1d491ecacb085a2552025c10a675a6fddcbd03b1fc9b36c536010ce265d2" dependencies = [ "leb128fmt", - "wasmparser 0.228.0", + "wasmparser 0.229.0", ] [[package]] @@ -1687,14 +1687,14 @@ dependencies = [ [[package]] name = "wasm-smith" -version = "0.228.0" +version = "0.229.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8906f0848b81bd33103f0db54396c52db4c46518eb55bebf28eae45a442b47f1" +checksum = "b0227a2ef527946ab58f9eefcb232576d89126db8c96b266c04e4a934cf24c92" dependencies = [ "anyhow", "arbitrary", "flagset", - "wasm-encoder 0.228.0", + "wasm-encoder 0.229.0", ] [[package]] @@ -1723,7 +1723,7 @@ dependencies = [ "wasmi_collections", "wasmi_core 0.46.0", "wasmi_ir", - "wasmparser 0.228.0", + "wasmparser 0.229.0", "wat", ] @@ -1805,7 +1805,7 @@ dependencies = [ "wasm-smith", "wasmi 0.31.2", "wasmi 0.46.0", - "wasmprinter 0.228.0", + "wasmprinter 0.229.0", "wasmtime", ] @@ -1831,7 +1831,7 @@ version = "0.46.0" dependencies = [ "anyhow", "wasmi 0.46.0", - "wast 228.0.0", + "wast 229.0.0", ] [[package]] @@ -1849,9 +1849,9 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.228.0" +version = "0.229.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4abf1132c1fdf747d56bbc1bb52152400c70f336870f968b85e89ea422198ae3" +checksum = "0cc3b1f053f5d41aa55640a1fa9b6d1b8a9e4418d118ce308d20e24ff3575a8c" dependencies = [ "bitflags", "hashbrown", @@ -1890,13 +1890,13 @@ dependencies = [ [[package]] name = "wasmprinter" -version = "0.228.0" +version = "0.229.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0df64bd38c14db359d02ce2024c64eb161aa2618ccee5f3bc5acbbd65c9a875c" +checksum = "d25dac01892684a99b8fbfaf670eb6b56edea8a096438c75392daeb83156ae2e" dependencies = [ "anyhow", "termcolor", - "wasmparser 0.228.0", + "wasmparser 0.229.0", ] [[package]] @@ -2063,15 +2063,15 @@ dependencies = [ [[package]] name = "wast" -version = "228.0.0" +version = "229.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5aae124478cb51439f6587f074a3a5e835afd22751c59a87b2e2a882727c97" +checksum = "63fcaff613c12225696bb163f79ca38ffb40e9300eff0ff4b8aa8b2f7eadf0d9" dependencies = [ "bumpalo", "leb128fmt", "memchr", "unicode-width", - "wasm-encoder 0.228.0", + "wasm-encoder 0.229.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 63438bbec1..ef76eade5b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,11 +41,11 @@ wasmi_fuzz = { version = "0.46.0", path = "crates/fuzz" } wasmi_wast = { version = "0.46.0", path = "crates/wast" } # wasm-tools dependencies -wat = { version = "1.228.0", default-features = false } -wast = { version = "228.0.0", default-features = false } -wasmparser = { version = "0.228.0", default-features = false } -wasm-smith = "0.228.0" -wasmprinter = { version = "0.228.0", default-features = false } +wat = { version = "1.229.0", default-features = false } +wast = { version = "229.0.0", default-features = false } +wasmparser = { version = "0.229.0", default-features = false } +wasm-smith = "0.229.0" +wasmprinter = { version = "0.229.0", default-features = false } # Wasmtime dependencies wasi-common = { version = "31.0.0", default-features = false } diff --git a/crates/wasmi/src/engine/translator/driver.rs b/crates/wasmi/src/engine/translator/driver.rs index ff73909833..349983f022 100644 --- a/crates/wasmi/src/engine/translator/driver.rs +++ b/crates/wasmi/src/engine/translator/driver.rs @@ -84,7 +84,7 @@ where self.translator.update_pos(pos); reader.visit_operator(&mut self.translator)??; } - reader.ensure_end()?; + reader.finish()?; Ok(reader.original_position()) } } diff --git a/crates/wasmi/src/engine/translator/mod.rs b/crates/wasmi/src/engine/translator/mod.rs index 14ed0a39cb..846f8fb967 100644 --- a/crates/wasmi/src/engine/translator/mod.rs +++ b/crates/wasmi/src/engine/translator/mod.rs @@ -356,12 +356,7 @@ where self.pos = pos; } - fn finish( - mut self, - finalize: impl FnOnce(CompiledFuncEntity), - ) -> Result { - let pos = self.current_pos(); - self.validator.finish(pos)?; + fn finish(self, finalize: impl FnOnce(CompiledFuncEntity)) -> Result { let translation = self.translator.finish(finalize)?; let validation = self.validator.into_allocations(); let allocations = ReusableAllocations { diff --git a/crates/wasmi/src/module/init_expr.rs b/crates/wasmi/src/module/init_expr.rs index 4254530555..4d96f01051 100644 --- a/crates/wasmi/src/module/init_expr.rs +++ b/crates/wasmi/src/module/init_expr.rs @@ -311,7 +311,7 @@ impl ConstExpr { }; } reader - .ensure_end() + .finish() .expect("due to Wasm validation this is guaranteed to succeed"); let op = stack .pop() diff --git a/crates/wasmi/src/module/parser.rs b/crates/wasmi/src/module/parser.rs index efc48e5e8d..a5733164e7 100644 --- a/crates/wasmi/src/module/parser.rs +++ b/crates/wasmi/src/module/parser.rs @@ -16,6 +16,7 @@ use crate::{ Error, FuncType, MemoryType, + Module, TableType, }; use alloc::boxed::Box; @@ -38,9 +39,6 @@ use wasmparser::{ Validator, }; -#[cfg(doc)] -use crate::Module; - mod buffered; mod streaming; @@ -54,8 +52,6 @@ pub struct ModuleParser { parser: WasmParser, /// The number of compiled or processed functions. engine_funcs: u32, - /// Flag, `true` when `stream` is at the end. - eof: bool, } impl ModuleParser { @@ -68,10 +64,16 @@ impl ModuleParser { validator: None, parser, engine_funcs: 0, - eof: false, } } + /// Finish Wasm module parsing and returns the resulting [`Module`]. + fn finish(&mut self, offset: usize, builder: ModuleBuilder) -> Result { + self.process_end(offset)?; + let module = builder.finish(&self.engine); + Ok(module) + } + /// Processes the end of the Wasm binary. fn process_end(&mut self, offset: usize) -> Result<(), Error> { if let Some(validator) = &mut self.validator { @@ -455,7 +457,7 @@ impl ModuleParser { } } if let Some(validator) = &mut self.validator { - validator.code_section_start(count, &range)?; + validator.code_section_start(&range)?; } Ok(()) } diff --git a/crates/wasmi/src/module/parser/buffered.rs b/crates/wasmi/src/module/parser/buffered.rs index 8217ec3854..c06e4435dc 100644 --- a/crates/wasmi/src/module/parser/buffered.rs +++ b/crates/wasmi/src/module/parser/buffered.rs @@ -6,7 +6,7 @@ use super::{ ModuleParser, }; use crate::{Error, Module}; -use wasmparser::{Chunk, Payload, Validator}; +use wasmparser::{Chunk, DataSectionReader, Payload, Validator}; impl ModuleParser { /// Starts parsing and validating the Wasm bytecode stream. @@ -57,54 +57,11 @@ impl ModuleParser { /// /// If the Wasm bytecode stream fails to validate. unsafe fn parse_buffered_impl(mut self, mut buffer: &[u8]) -> Result { + let buffer = &mut buffer; let mut custom_sections = CustomSectionsBuilder::default(); - let header = Self::parse_buffered_header(&mut self, &mut buffer, &mut custom_sections)?; - let builder = Self::parse_buffered_code(&mut self, &mut buffer, header, custom_sections)?; - let module = Self::parse_buffered_data(&mut self, &mut buffer, builder)?; - Ok(module) - } - - /// Fetch next Wasm module payload and adust the `buffer`. - /// - /// # Errors - /// - /// If the parsed Wasm is malformed. - fn next_payload<'a>(&mut self, buffer: &mut &'a [u8]) -> Result<(usize, Payload<'a>), Error> { - match self.parser.parse(&buffer[..], true)? { - Chunk::Parsed { consumed, payload } => Ok((consumed, payload)), - Chunk::NeedMoreData(_hint) => { - // This is not possible since `eof` is always true. - unreachable!() - } - } - } - - /// Consumes the parts of the buffer that have been processed. - fn consume_buffer<'a>(consumed: usize, buffer: &mut &'a [u8]) -> &'a [u8] { - let (consumed, remaining) = buffer.split_at(consumed); - *buffer = remaining; - consumed - } - - /// Parse the Wasm module header. - /// - /// - The Wasm module header is the set of all sections that appear before - /// the Wasm code section. - /// - We separate parsing of the Wasm module header since the information of - /// the Wasm module header is required for translating the Wasm code section. - /// - /// # Errors - /// - /// If the Wasm bytecode stream fails to parse or validate. - fn parse_buffered_header( - &mut self, - buffer: &mut &[u8], - custom_sections: &mut CustomSectionsBuilder, - ) -> Result { let mut header = ModuleHeaderBuilder::new(&self.engine); loop { - let (consumed, payload) = self.next_payload(buffer)?; - match payload { + match self.next_payload(buffer)? { Payload::Version { num, encoding, @@ -124,19 +81,26 @@ impl ModuleParser { Payload::DataCountSection { count, range } => self.process_data_count(count, range), Payload::CodeSectionStart { count, range, size } => { self.process_code_start(count, range, size)?; - Self::consume_buffer(consumed, buffer); - break; + return self.parse_buffered_code(buffer, header.finish(), custom_sections); + } + Payload::DataSection(data_section) => { + return self.parse_buffered_data( + buffer, + data_section, + header.finish(), + custom_sections, + ); + } + Payload::End(offset) => { + return self + .finish(offset, ModuleBuilder::new(header.finish(), custom_sections)) } - Payload::DataSection(_) => break, - Payload::End(_) => break, Payload::CustomSection(reader) => { - self.process_custom_section(custom_sections, reader) + self.process_custom_section(&mut custom_sections, reader) } unexpected => self.process_invalid_payload(unexpected), }?; - Self::consume_buffer(consumed, buffer); } - Ok(header.finish()) } /// Parse the Wasm code section entries. @@ -152,27 +116,29 @@ impl ModuleParser { &mut self, buffer: &mut &[u8], header: ModuleHeader, - custom_sections: CustomSectionsBuilder, - ) -> Result { + mut custom_sections: CustomSectionsBuilder, + ) -> Result { loop { - let (consumed, payload) = self.next_payload(buffer)?; - match payload { + match self.next_payload(buffer)? { Payload::CodeSectionEntry(func_body) => { - // Note: Unfortunately the `wasmparser` crate is missing an API - // to return the byte slice for the respective code section - // entry payload. Please remove this work around as soon as - // such an API becomes available. - Self::consume_buffer(consumed, buffer); let bytes = func_body.as_bytes(); self.process_code_entry(func_body, bytes, &header)?; } - _ => break, + Payload::CustomSection(reader) => { + self.process_custom_section(&mut custom_sections, reader)?; + } + Payload::DataSection(data_section) => { + return self.parse_buffered_data(buffer, data_section, header, custom_sections); + } + Payload::End(offset) => { + return self.finish(offset, ModuleBuilder::new(header, custom_sections)) + } + unexpected => self.process_invalid_payload(unexpected)?, } } - Ok(ModuleBuilder::new(header, custom_sections)) } - /// Parse the Wasm data section and finalize parsing. + /// Parse post the Wasm data section and finalize parsing. /// /// We separate parsing of the Wasm data section since it is the only Wasm /// section that comes after the Wasm code section that we have to separate @@ -184,25 +150,35 @@ impl ModuleParser { fn parse_buffered_data( &mut self, buffer: &mut &[u8], - mut builder: ModuleBuilder, + data_section: DataSectionReader, + header: ModuleHeader, + custom_sections: CustomSectionsBuilder, ) -> Result { + let mut builder = ModuleBuilder::new(header, custom_sections); + self.process_data(data_section, &mut builder)?; loop { - let (consumed, payload) = self.next_payload(buffer)?; - match payload { - Payload::DataSection(section) => { - self.process_data(section, &mut builder)?; - } - Payload::End(offset) => { - self.process_end(offset)?; - break; - } + match self.next_payload(buffer)? { + Payload::End(offset) => return self.finish(offset, builder), Payload::CustomSection(reader) => { self.process_custom_section(&mut builder.custom_sections, reader)?; } invalid => self.process_invalid_payload(invalid)?, } - Self::consume_buffer(consumed, buffer); } - Ok(builder.finish(&self.engine)) + } + + /// Fetch next Wasm module payload and adust the `buffer`. + /// + /// # Errors + /// + /// If the parsed Wasm is malformed. + fn next_payload<'a>(&mut self, buffer: &mut &'a [u8]) -> Result, Error> { + let chunk = self.parser.parse(&buffer[..], true)?; + let Chunk::Parsed { consumed, payload } = chunk else { + // Unreachable since `wasmparser` promises to return `Parsed` if `eof` is `true`. + unreachable!() + }; + *buffer = &buffer[consumed..]; + Ok(payload) } } diff --git a/crates/wasmi/src/module/parser/streaming.rs b/crates/wasmi/src/module/parser/streaming.rs index e623f851e2..2551c75784 100644 --- a/crates/wasmi/src/module/parser/streaming.rs +++ b/crates/wasmi/src/module/parser/streaming.rs @@ -7,20 +7,26 @@ use super::{ }; use crate::{Error, Module, Read}; use alloc::vec::Vec; -use core::ops::{Deref, DerefMut}; use wasmparser::{Chunk, Payload, Validator}; /// A buffer for holding parsed payloads in bytes. #[derive(Debug, Default, Clone)] struct ParseBuffer { buffer: Vec, + eof: bool, } impl ParseBuffer { + /// Returns the underlying bytes of the [`ParseBuffer`]. + #[inline] + fn bytes(&self) -> &[u8] { + &self.buffer[..] + } + /// Drops the first `amount` bytes from the [`ParseBuffer`] as they have been consumed. #[inline] - fn consume(buffer: &mut Self, amount: usize) { - buffer.drain(..amount); + fn consume(&mut self, amount: usize) { + self.buffer.drain(..amount); } /// Pulls more bytes from the `stream` in order to produce Wasm payload. @@ -31,35 +37,25 @@ impl ParseBuffer { /// /// Uses `hint` to efficiently preallocate enough space for the next payload. #[inline] - fn pull_bytes(buffer: &mut Self, hint: u64, stream: &mut impl Read) -> Result { + fn pull_bytes(&mut self, hint: u64, stream: &mut impl Read) -> Result<(), Error> { // Use the hint to preallocate more space, then read // some more data into the buffer. // // Note that the buffer management here is not ideal, // but it's compact enough to fit in an example! - let len = buffer.len(); + let len = self.buffer.len(); let new_len = len + hint as usize; - buffer.resize(new_len, 0x0_u8); - let read_bytes = stream.read(&mut buffer[len..])?; - buffer.truncate(len + read_bytes); - let reached_end = read_bytes == 0; - Ok(reached_end) + self.buffer.resize(new_len, 0x0_u8); + let read_bytes = stream.read(&mut self.buffer[len..])?; + self.buffer.truncate(len + read_bytes); + self.eof = read_bytes == 0; + Ok(()) } -} - -impl Deref for ParseBuffer { - type Target = Vec; + /// Returns `true` if [`ParseBuffer`] reached the end of file. #[inline] - fn deref(&self) -> &Self::Target { - &self.buffer - } -} - -impl DerefMut for ParseBuffer { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.buffer + fn eof(&self) -> bool { + self.eof } } @@ -112,22 +108,10 @@ impl ModuleParser { /// /// If the Wasm bytecode stream fails to validate. unsafe fn parse_streaming_impl(mut self, mut stream: impl Read) -> Result { - let mut custom_sections = CustomSectionsBuilder::default(); + let custom_sections = CustomSectionsBuilder::default(); let mut buffer = ParseBuffer::default(); - let header = Self::parse_streaming_header( - &mut self, - &mut stream, - &mut buffer, - &mut custom_sections, - )?; - let builder = Self::parse_streaming_code( - &mut self, - &mut stream, - &mut buffer, - header, - custom_sections, - )?; - let module = Self::parse_streaming_data(&mut self, &mut stream, &mut buffer, builder)?; + let module = + Self::parse_streaming_module(&mut self, &mut stream, &mut buffer, custom_sections)?; Ok(module) } @@ -141,20 +125,17 @@ impl ModuleParser { /// # Errors /// /// If the Wasm bytecode stream fails to parse or validate. - fn parse_streaming_header( + fn parse_streaming_module( &mut self, stream: &mut impl Read, buffer: &mut ParseBuffer, - custom_sections: &mut CustomSectionsBuilder, - ) -> Result { + mut custom_sections: CustomSectionsBuilder, + ) -> Result { let mut header = ModuleHeaderBuilder::new(&self.engine); loop { - match self.parser.parse(&buffer[..], self.eof)? { + match self.parser.parse(buffer.bytes(), buffer.eof())? { Chunk::NeedMoreData(hint) => { - self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?; - if self.eof { - break; - } + buffer.pull_bytes(hint, stream)?; } Chunk::Parsed { consumed, payload } => { match payload { @@ -191,22 +172,36 @@ impl ModuleParser { } Payload::CodeSectionStart { count, range, size } => { self.process_code_start(count, range, size)?; - ParseBuffer::consume(buffer, consumed); - break; + buffer.consume(consumed); + return self.parse_streaming_code( + stream, + buffer, + header.finish(), + custom_sections, + ); + } + Payload::DataSection(data_section) => { + let mut builder = ModuleBuilder::new(header.finish(), custom_sections); + self.process_data(data_section, &mut builder)?; + buffer.consume(consumed); + return self.parse_streaming_data(stream, buffer, builder); + } + Payload::End(offset) => { + buffer.consume(consumed); + return self.finish( + offset, + ModuleBuilder::new(header.finish(), custom_sections), + ); } - Payload::DataSection(_) => break, - Payload::End(_) => break, Payload::CustomSection(reader) => { - self.process_custom_section(custom_sections, reader) + self.process_custom_section(&mut custom_sections, reader) } unexpected => self.process_invalid_payload(unexpected), }?; - // Cut away the parts from the intermediate buffer that have already been parsed. - ParseBuffer::consume(buffer, consumed); + buffer.consume(consumed); } } } - Ok(header.finish()) } /// Parse the Wasm code section entries. @@ -223,12 +218,12 @@ impl ModuleParser { stream: &mut impl Read, buffer: &mut ParseBuffer, header: ModuleHeader, - custom_sections: CustomSectionsBuilder, - ) -> Result { + mut custom_sections: CustomSectionsBuilder, + ) -> Result { loop { - match self.parser.parse(&buffer[..], self.eof)? { + match self.parser.parse(buffer.bytes(), buffer.eof())? { Chunk::NeedMoreData(hint) => { - self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?; + buffer.pull_bytes(hint, stream)?; } Chunk::Parsed { consumed, payload } => { match payload { @@ -240,14 +235,26 @@ impl ModuleParser { let bytes = func_body.as_bytes(); self.process_code_entry(func_body, bytes, &header)?; } - _ => break, + Payload::CustomSection(reader) => { + self.process_custom_section(&mut custom_sections, reader)?; + } + Payload::DataSection(data_section) => { + let mut builder = ModuleBuilder::new(header, custom_sections); + self.process_data(data_section, &mut builder)?; + buffer.consume(consumed); + return self.parse_streaming_data(stream, buffer, builder); + } + Payload::End(offset) => { + buffer.consume(consumed); + return self + .finish(offset, ModuleBuilder::new(header, custom_sections)); + } + unexpected => self.process_invalid_payload(unexpected)?, } - // Cut away the parts from the intermediate buffer that have already been parsed. - ParseBuffer::consume(buffer, consumed); + buffer.consume(consumed); } } } - Ok(ModuleBuilder::new(header, custom_sections)) } /// Parse the Wasm data section and finalize parsing. @@ -266,30 +273,24 @@ impl ModuleParser { mut builder: ModuleBuilder, ) -> Result { loop { - match self.parser.parse(&buffer[..], self.eof)? { + match self.parser.parse(buffer.bytes(), buffer.eof())? { Chunk::NeedMoreData(hint) => { - self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?; + buffer.pull_bytes(hint, stream)?; } Chunk::Parsed { consumed, payload } => { match payload { - Payload::DataSection(section) => { - self.process_data(section, &mut builder)?; - } Payload::End(offset) => { - self.process_end(offset)?; - ParseBuffer::consume(buffer, consumed); - break; + buffer.consume(consumed); + return self.finish(offset, builder); } Payload::CustomSection(reader) => { self.process_custom_section(&mut builder.custom_sections, reader)? } invalid => self.process_invalid_payload(invalid)?, } - // Cut away the parts from the intermediate buffer that have already been parsed. - ParseBuffer::consume(buffer, consumed); + buffer.consume(consumed); } } } - Ok(builder.finish(&self.engine)) } }