Skip to content

Commit

Permalink
wasmparser: Add validator identifiers and a reset method (#1506)
Browse files Browse the repository at this point in the history
* wasmparser: Add validator IDs and a reset method

Lets users reuse validators (and their typing contexts) across different Wasm
modules. The IDs are for helping them assert they are using the correct context
with their `CoreTypeId`s and all that.

* Remove from impl
  • Loading branch information
fitzgen authored Apr 18, 2024
1 parent 371aff5 commit ff2b40f
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 8 deletions.
125 changes: 122 additions & 3 deletions crates/wasmparser/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::{
use bitflags::bitflags;
use std::mem;
use std::ops::Range;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;

/// Test whether the given buffer contains a valid WebAssembly module or component,
Expand Down Expand Up @@ -86,6 +87,23 @@ fn combine_type_sizes(a: u32, b: u32, offset: usize) -> Result<u32> {
}
}

/// A unique identifier for a particular `Validator`.
///
/// Allows you to save the `ValidatorId` of the [`Validator`][crate::Validator]
/// you get identifiers out of (e.g. [`CoreTypeId`][crate::types::CoreTypeId])
/// and then later assert that you are pairing those identifiers with the same
/// `Validator` instance when accessing the identifier's associated data.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
pub struct ValidatorId(usize);

impl Default for ValidatorId {
#[inline]
fn default() -> Self {
static ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
ValidatorId(ID_COUNTER.fetch_add(1, Ordering::AcqRel))
}
}

/// Validator for a WebAssembly binary module or component.
///
/// This structure encapsulates state necessary to validate a WebAssembly
Expand Down Expand Up @@ -113,6 +131,8 @@ fn combine_type_sizes(a: u32, b: u32, offset: usize) -> Result<u32> {
/// [core]: https://webassembly.github.io/spec/core/valid/index.html
#[derive(Default)]
pub struct Validator {
id: ValidatorId,

/// The current state of the validator.
state: State,

Expand Down Expand Up @@ -484,6 +504,100 @@ impl Validator {
&self.features
}

/// Reset this validator's state such that it is ready to validate a new
/// Wasm module or component.
///
/// This does *not* clear or reset the internal state keeping track of
/// validated (and deduplicated and canonicalized) types, allowing you to
/// use the same type identifiers (such as
/// [`CoreTypeId`][crate::types::CoreTypeId]) for the same types that are
/// defined multiple times across different modules and components.
///
/// ```
/// fn foo() -> anyhow::Result<()> {
/// use wasmparser::Validator;
///
/// let mut validator = Validator::default();
///
/// // Two wasm modules, both of which define the same type, but at
/// // different indices in their respective types index spaces.
/// let wasm1 = wat::parse_str("
/// (module
/// (type $same_type (func (param i32) (result f64)))
/// )
/// ")?;
/// let wasm2 = wat::parse_str("
/// (module
/// (type $different_type (func))
/// (type $same_type (func (param i32) (result f64)))
/// )
/// ")?;
///
/// // Validate the first Wasm module and get the ID of its type.
/// let types = validator.validate_all(&wasm1)?;
/// let id1 = types.core_type_at(0);
///
/// // Reset the validator so we can parse the second wasm module inside
/// // this validator's same context.
/// validator.reset();
///
/// // Validate the second Wasm module and get the ID of its second type,
/// // which is the same type as the first Wasm module's only type.
/// let types = validator.validate_all(&wasm2)?;
/// let id2 = types.core_type_at(1);
///
/// // Because both modules were processed in the same `Validator`, they
/// // share the same types context and therefore the same type defined
/// // multiple times across different modules will be deduplicated and
/// // assigned the same identifier!
/// assert_eq!(id1, id2);
/// assert_eq!(types[id1.unwrap_sub()], types[id2.unwrap_sub()]);
/// # Ok(())
/// # }
/// # foo().unwrap()
/// ```
pub fn reset(&mut self) {
let Validator {
// Not changing the identifier; users should be able to observe that
// they are using the same validation context, even after resetting.
id: _,

// Don't mess with `types`, we specifically want to reuse canonicalizations.
types: _,

// Also leave features as they are. While this is perhaps not
// strictly necessary, it helps us avoid weird bugs where we have
// different views of what is or is not a valid type at different
// times, despite using the same `TypeList` and hash consing
// context, and therefore there could be moments in time where we
// have "invalid" types inside our current types list.
features: _,

state,
module,
components,
} = self;

assert!(
matches!(state, State::End),
"cannot reset a validator that did not successfully complete validation"
);
assert!(module.is_none());
assert!(components.is_empty());

*state = State::default();
}

/// Get this validator's unique identifier.
///
/// Allows you to assert that you are always working with the same
/// `Validator` instance, when you can't otherwise statically ensure that
/// property by e.g. storing a reference to the validator inside your
/// structure.
pub fn id(&self) -> ValidatorId {
self.id
}

/// Validates an entire in-memory module or component with this validator.
///
/// This function will internally create a [`Parser`] to parse the `bytes`
Expand Down Expand Up @@ -530,7 +644,7 @@ impl Validator {
pub fn types(&self, mut level: usize) -> Option<TypesRef> {
if let Some(module) = &self.module {
if level == 0 {
return Some(TypesRef::from_module(&self.types, &module.module));
return Some(TypesRef::from_module(self.id, &self.types, &module.module));
} else {
level -= 1;
}
Expand All @@ -539,7 +653,7 @@ impl Validator {
self.components
.iter()
.nth_back(level)
.map(|component| TypesRef::from_component(&self.types, component))
.map(|component| TypesRef::from_component(self.id, &self.types, component))
}

/// Convenience function to validate a single [`Payload`].
Expand Down Expand Up @@ -1372,6 +1486,7 @@ impl Validator {
}

Ok(Types::from_module(
self.id,
self.types.commit(),
state.module.arc().clone(),
))
Expand All @@ -1396,7 +1511,11 @@ impl Validator {
self.state = State::Component;
}

Ok(Types::from_component(self.types.commit(), component))
Ok(Types::from_component(
self.id,
self.types.commit(),
component,
))
}
}
}
Expand Down
37 changes: 32 additions & 5 deletions crates/wasmparser/src/validator/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::{
component::{ComponentState, ExternKind},
core::Module,
};
use crate::{validator::names::KebabString, HeapType};
use crate::{validator::names::KebabString, HeapType, ValidatorId};
use crate::{
BinaryReaderError, CompositeType, Export, ExternalKind, FuncType, GlobalType, Import, Matches,
MemoryType, PackedIndex, PrimitiveValType, RecGroup, RefType, Result, SubType, TableType,
Expand Down Expand Up @@ -1474,6 +1474,7 @@ enum TypesKind {
///
/// The type information is returned via the [`crate::Validator::end`] method.
pub struct Types {
id: ValidatorId,
list: TypeList,
kind: TypesKind,
}
Expand All @@ -1489,25 +1490,38 @@ enum TypesRefKind<'a> {
/// Retrieved via the [`crate::Validator::types`] method.
#[derive(Clone, Copy)]
pub struct TypesRef<'a> {
id: ValidatorId,
list: &'a TypeList,
kind: TypesRefKind<'a>,
}

impl<'a> TypesRef<'a> {
pub(crate) fn from_module(types: &'a TypeList, module: &'a Module) -> Self {
pub(crate) fn from_module(id: ValidatorId, types: &'a TypeList, module: &'a Module) -> Self {
Self {
id,
list: types,
kind: TypesRefKind::Module(module),
}
}

pub(crate) fn from_component(types: &'a TypeList, component: &'a ComponentState) -> Self {
pub(crate) fn from_component(
id: ValidatorId,
types: &'a TypeList,
component: &'a ComponentState,
) -> Self {
Self {
id,
list: types,
kind: TypesRefKind::Component(component),
}
}

/// Get the id of the validator that these types are associated with.
#[inline]
pub fn id(&self) -> ValidatorId {
self.id
}

/// Gets a type based on its type id.
///
/// Returns `None` if the type id is unknown.
Expand Down Expand Up @@ -1930,23 +1944,36 @@ where
}

impl Types {
pub(crate) fn from_module(types: TypeList, module: Arc<Module>) -> Self {
pub(crate) fn from_module(id: ValidatorId, types: TypeList, module: Arc<Module>) -> Self {
Self {
id,
list: types,
kind: TypesKind::Module(module),
}
}

pub(crate) fn from_component(types: TypeList, component: ComponentState) -> Self {
pub(crate) fn from_component(
id: ValidatorId,
types: TypeList,
component: ComponentState,
) -> Self {
Self {
id,
list: types,
kind: TypesKind::Component(component),
}
}

/// Get the id of the validator that these types are associated with.
#[inline]
pub fn id(&self) -> ValidatorId {
self.id
}

/// Gets a reference to this validation type information.
pub fn as_ref(&self) -> TypesRef {
TypesRef {
id: self.id,
list: &self.list,
kind: match &self.kind {
TypesKind::Module(module) => TypesRefKind::Module(module),
Expand Down

0 comments on commit ff2b40f

Please sign in to comment.