diff --git a/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs b/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs index 7cce473d..431a7057 100644 --- a/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs +++ b/crates/rsonpath-benchmarks/src/implementations/rsonpath.rs @@ -1,15 +1,10 @@ use crate::framework::implementation::Implementation; -use ouroboros::self_referencing; +use rsonpath::{engine::Compiler, input::MmapInput}; use rsonpath::{ - engine::main::MainEngine, + engine::{main::MainEngine, Engine}, input::OwnedBytes, result::{Match, Sink}, }; -use rsonpath::{ - engine::{Compiler, Engine}, - input::MmapInput, -}; -use rsonpath_syntax::JsonPathQuery; use std::{convert::Infallible, fmt::Display, fs, io}; use thiserror::Error; @@ -18,16 +13,8 @@ pub struct RsonpathCount {} pub struct RsonpathMmap {} pub struct RsonpathMmapCount {} -#[self_referencing()] -pub struct RsonpathQuery { - query: JsonPathQuery, - #[borrows(query)] - #[not_covariant] - engine: MainEngine<'this>, -} - impl Implementation for Rsonpath { - type Query = RsonpathQuery; + type Query = MainEngine; type File = OwnedBytes>; @@ -52,25 +39,20 @@ impl Implementation for Rsonpath { fn compile_query(&self, query: &str) -> Result { let query = rsonpath_syntax::parse(query).unwrap(); + let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?; - let rsonpath = RsonpathQuery::try_new(query, |query| { - MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) - })?; - - Ok(rsonpath) + Ok(engine) } fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { - query - .with_engine(|engine| engine.matches(file, &mut VoidSink)) - .map_err(RsonpathError::EngineError)?; + query.matches(file, &mut VoidSink).map_err(RsonpathError::EngineError)?; Ok("[not collected]") } } impl Implementation for RsonpathCount { - type Query = RsonpathQuery; + type Query = MainEngine; type File = OwnedBytes>; @@ -95,25 +77,20 @@ impl Implementation for RsonpathCount { fn compile_query(&self, query: &str) -> Result { let query = rsonpath_syntax::parse(query).unwrap(); + let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?; - let rsonpath = RsonpathQuery::try_new(query, |query| { - MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) - })?; - - Ok(rsonpath) + Ok(engine) } fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { - query - .with_engine(|engine| engine.count(file)) - .map_err(RsonpathError::EngineError)?; + query.count(file).map_err(RsonpathError::EngineError)?; Ok("[not collected]") } } impl Implementation for RsonpathMmap { - type Query = RsonpathQuery; + type Query = MainEngine; type File = MmapInput; @@ -138,25 +115,20 @@ impl Implementation for RsonpathMmap { fn compile_query(&self, query: &str) -> Result { let query = rsonpath_syntax::parse(query).unwrap(); + let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?; - let rsonpath = RsonpathQuery::try_new(query, |query| { - MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) - })?; - - Ok(rsonpath) + Ok(engine) } fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { - query - .with_engine(|engine| engine.matches(file, &mut VoidSink)) - .map_err(RsonpathError::EngineError)?; + query.matches(file, &mut VoidSink).map_err(RsonpathError::EngineError)?; Ok("[not collected]") } } impl Implementation for RsonpathMmapCount { - type Query = RsonpathQuery; + type Query = MainEngine; type File = MmapInput; @@ -181,18 +153,13 @@ impl Implementation for RsonpathMmapCount { fn compile_query(&self, query: &str) -> Result { let query = rsonpath_syntax::parse(query).unwrap(); + let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?; - let rsonpath = RsonpathQuery::try_new(query, |query| { - MainEngine::compile_query(query).map_err(RsonpathError::CompilerError) - })?; - - Ok(rsonpath) + Ok(engine) } fn run(&self, query: &Self::Query, file: &Self::File) -> Result, Self::Error> { - query - .with_engine(|engine| engine.count(file)) - .map_err(RsonpathError::EngineError)?; + query.count(file).map_err(RsonpathError::EngineError)?; Ok("[not collected]") } diff --git a/crates/rsonpath-lib/src/automaton.rs b/crates/rsonpath-lib/src/automaton.rs index f0e4503b..f5634cd5 100644 --- a/crates/rsonpath-lib/src/automaton.rs +++ b/crates/rsonpath-lib/src/automaton.rs @@ -8,20 +8,20 @@ mod state; pub use state::{State, StateAttributes}; -use crate::{automaton::error::CompilerError, debug}; +use crate::{automaton::error::CompilerError, debug, string_pattern::StringPattern}; use nfa::NondeterministicAutomaton; -use rsonpath_syntax::{num::JsonUInt, str::JsonString, JsonPathQuery}; +use rsonpath_syntax::{num::JsonUInt, JsonPathQuery}; use smallvec::SmallVec; -use std::{fmt::Display, ops::Index}; +use std::{fmt::Display, ops::Index, rc::Rc}; /// A minimal, deterministic automaton representing a JSONPath query. #[derive(Debug, PartialEq, Eq)] -pub struct Automaton<'q> { - states: Vec>, +pub struct Automaton { + states: Vec, } -/// Transition when a JSON member name matches a [`JsonString`]i. -pub type MemberTransition<'q> = (&'q JsonString, State); +/// Transition when a JSON member name matches a [`StringPattern`]. +pub type MemberTransition = (Rc, State); /// Transition on elements of an array with indices specified by either a single index /// or a simple slice expression. @@ -45,9 +45,9 @@ pub(super) enum ArrayTransitionLabel { /// Contains transitions triggered by matching member names or array indices, and a fallback transition /// triggered when none of the labelled transitions match. #[derive(Debug)] -pub struct StateTable<'q> { +pub struct StateTable { attributes: StateAttributes, - member_transitions: SmallVec<[MemberTransition<'q>; 2]>, + member_transitions: SmallVec<[MemberTransition; 2]>, array_transitions: SmallVec<[ArrayTransition; 2]>, fallback_state: State, } @@ -59,7 +59,7 @@ pub(crate) struct SimpleSlice { step: JsonUInt, } -impl Default for StateTable<'_> { +impl Default for StateTable { #[inline] fn default() -> Self { Self { @@ -71,7 +71,7 @@ impl Default for StateTable<'_> { } } -impl PartialEq for StateTable<'_> { +impl PartialEq for StateTable { #[inline] fn eq(&self, other: &Self) -> bool { return self.fallback_state == other.fallback_state @@ -88,10 +88,10 @@ impl PartialEq for StateTable<'_> { } } -impl Eq for StateTable<'_> {} +impl Eq for StateTable {} -impl<'q> Index for Automaton<'q> { - type Output = StateTable<'q>; +impl Index for Automaton { + type Output = StateTable; #[inline(always)] fn index(&self, index: State) -> &Self::Output { @@ -149,7 +149,7 @@ impl From for ArrayTransitionLabel { } } -impl<'q> Automaton<'q> { +impl Automaton { /// Convert a [`JsonPathQuery`] into a minimal deterministic automaton. /// /// # Errors @@ -158,10 +158,10 @@ impl<'q> Automaton<'q> { /// - [`CompilerError::NotSupported`] raised if the query contains elements /// not yet supported by the compiler. #[inline] - pub fn new(query: &'q JsonPathQuery) -> Result { + pub fn new(query: &JsonPathQuery) -> Result { let nfa = NondeterministicAutomaton::new(query)?; debug!("NFA: {}", nfa); - Automaton::minimize(nfa) + Self::minimize(nfa) } /// Returns whether this automaton represents the select-root JSONPath query ('$'). @@ -389,12 +389,12 @@ impl<'q> Automaton<'q> { self[state].attributes.is_unitary() } - fn minimize(nfa: NondeterministicAutomaton<'q>) -> Result { + fn minimize(nfa: NondeterministicAutomaton) -> Result { minimizer::minimize(nfa) } } -impl<'q> StateTable<'q> { +impl StateTable { /// Returns the state to which a fallback transition leads. /// /// A fallback transition is the catch-all transition triggered @@ -421,7 +421,7 @@ impl<'q> StateTable<'q> { /// to the contained [`State`]. #[must_use] #[inline(always)] - pub fn member_transitions(&self) -> &[MemberTransition<'q>] { + pub fn member_transitions(&self) -> &[MemberTransition] { &self.member_transitions } } @@ -442,7 +442,7 @@ impl Display for ArrayTransitionLabel { } } -impl Display for Automaton<'_> { +impl Display for Automaton { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "digraph {{")?; @@ -503,7 +503,12 @@ impl Display for Automaton<'_> { } } for (label, state) in &transitions.member_transitions { - writeln!(f, " {i} -> {} [label=\"{}\"]", state.0, label.unquoted())? + writeln!( + f, + " {i} -> {} [label=\"{}\"]", + state.0, + std::str::from_utf8(label.unquoted()).expect("labels to be valid utf8") + )? } writeln!(f, " {i} -> {} [label=\"*\"]", transitions.fallback_state.0)?; } diff --git a/crates/rsonpath-lib/src/automaton/minimizer.rs b/crates/rsonpath-lib/src/automaton/minimizer.rs index 77861efe..35d7e5dd 100644 --- a/crates/rsonpath-lib/src/automaton/minimizer.rs +++ b/crates/rsonpath-lib/src/automaton/minimizer.rs @@ -1,5 +1,7 @@ //! Determinization and minimization of an NFA into the final DFA used by the engines. +use std::rc::Rc; + // NOTE: Some comments in this module are outdated, because the minimizer doesn't // actually produce minimal automata as of now - see #91. use super::{ @@ -10,8 +12,7 @@ use super::{ state::StateAttributesBuilder, Automaton, NondeterministicAutomaton, State as DfaStateId, StateAttributes, StateTable, }; -use crate::{automaton::ArrayTransition, debug}; -use rsonpath_syntax::str::JsonString; +use crate::{automaton::ArrayTransition, debug, string_pattern::StringPattern}; use smallvec::{smallvec, SmallVec}; use vector_map::VecMap; @@ -31,9 +32,9 @@ pub(super) fn minimize(nfa: NondeterministicAutomaton) -> Result { +pub(super) struct Minimizer { /// The NFA being minimized. - nfa: NondeterministicAutomaton<'q>, + nfa: NondeterministicAutomaton, /// All superstates created thus far mapping to their index in the DFA being constructed. superstates: VecMap, /// Map from superstates to the furthest reachable checkpoint on a path leading to that superstate. @@ -41,15 +42,15 @@ pub(super) struct Minimizer<'q> { /// Superstates that have not been processed and expanded yet. active_superstates: SmallVec<[SmallSet256; 2]>, /// All superstates created thus far, in order matching the `superstates` map. - dfa_states: Vec>, + dfa_states: Vec, /// Set of activated DFA states that are accepting. accepting: SmallSet256, } #[derive(Debug)] -struct SuperstateTransitionTable<'q> { +struct SuperstateTransitionTable { array: ArrayTransitionSet, - member: VecMap<&'q JsonString, SmallSet256>, + member: VecMap, SmallSet256>, wildcard: SmallSet256, } @@ -78,10 +79,10 @@ struct SuperstateTransitionTable<'q> { * Superstate number 0 is specifically designated as the rejecting state, * which is used when there is no available checkpoint to return to. **/ -impl<'q> Minimizer<'q> { +impl Minimizer { /// Main loop of the algorithm. Initialize rejecting and initial states /// and perform expansion until we run out of active states. - fn run(mut self) -> Result, CompilerError> { + fn run(mut self) -> Result { // Rejecting state has no outgoing transitions except for a self-loop. self.dfa_states.push(StateTable { array_transitions: smallvec![], @@ -176,7 +177,7 @@ impl<'q> Minimizer<'q> { &self, id: DfaStateId, array_transitions: &[ArrayTransition], - member_transitions: &[(&JsonString, DfaStateId)], + member_transitions: &[(Rc, DfaStateId)], fallback: DfaStateId, ) -> StateAttributes { let mut attrs = StateAttributesBuilder::new(); @@ -261,7 +262,7 @@ impl<'q> Minimizer<'q> { &self, current_superstate: SmallSet256, current_checkpoint: Option, - ) -> Result, CompilerError> { + ) -> Result { let mut wildcard_targets = current_superstate .iter() .map(NfaStateId) @@ -285,7 +286,7 @@ impl<'q> Minimizer<'q> { }; for nfa_state in current_superstate.iter().map(NfaStateId) { - match self.nfa[nfa_state] { + match &self.nfa[nfa_state] { // Direct states simply have a single transition to the next state in the NFA. // Recursive transitions also have a self-loop, but that is handled by the // checkpoints mechanism - here we only handle the forward transition. @@ -293,17 +294,17 @@ impl<'q> Minimizer<'q> { | NfaState::Recursive(nfa::Transition::Member(label)) => { debug!( "Considering member transition {nfa_state} --{}-> {}", - label.unquoted(), + std::str::from_utf8(label.unquoted()).unwrap_or("[invalid utf8]"), nfa_state.next()?, ); // Add the target NFA state to the target superstate, or create a singleton // set if this is the first transition via this label encountered in the loop. - if let Some(target) = transitions.member.get_mut(&label) { + if let Some(target) = transitions.member.get_mut(label) { target.insert(nfa_state.next()?.0); } else { let mut new_set = transitions.wildcard; new_set.insert(nfa_state.next()?.0); - transitions.member.insert(label, new_set); + transitions.member.insert(label.clone(), new_set); } } NfaState::Direct(nfa::Transition::Array(label)) @@ -336,7 +337,7 @@ impl<'q> Minimizer<'q> { ); let mut new_set = transitions.wildcard; new_set.insert(nfa_state.next()?.0); - transitions.array.add_transition(label, new_set); + transitions.array.add_transition(*label, new_set); } NfaState::Direct(nfa::Transition::Wildcard) | NfaState::Recursive(nfa::Transition::Wildcard) @@ -405,6 +406,7 @@ mod tests { use super::super::*; use super::*; use pretty_assertions::assert_eq; + use rsonpath_syntax::str::JsonString; use smallvec::smallvec; #[test] @@ -552,16 +554,16 @@ mod tests { #[test] fn interstitial_descendant_wildcard() { // Query = $..a.b..*.a..b - let label_a = JsonString::new("a"); - let label_b = JsonString::new("b"); + let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Member(&label_a)), - NfaState::Direct(nfa::Transition::Member(&label_b)), + NfaState::Recursive(nfa::Transition::Member(label_a.clone())), + NfaState::Direct(nfa::Transition::Member(label_b.clone())), NfaState::Recursive(nfa::Transition::Wildcard), - NfaState::Direct(nfa::Transition::Member(&label_a)), - NfaState::Recursive(nfa::Transition::Member(&label_b)), + NfaState::Direct(nfa::Transition::Member(label_a.clone())), + NfaState::Recursive(nfa::Transition::Member(label_b.clone())), NfaState::Accepting, ], }; @@ -578,13 +580,13 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(2))], + member_transitions: smallvec![(label_a.clone(), State(2))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(2)), (&label_b, State(3))], + member_transitions: smallvec![(label_a.clone(), State(2)), (label_b.clone(), State(3))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, @@ -596,19 +598,19 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5))], + member_transitions: smallvec![(label_a, State(5))], fallback_state: State(4), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_b, State(6))], + member_transitions: smallvec![(label_b.clone(), State(6))], fallback_state: State(5), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_b, State(6))], + member_transitions: smallvec![(label_b, State(6))], fallback_state: State(5), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, @@ -621,16 +623,16 @@ mod tests { #[test] fn interstitial_nondescendant_wildcard() { // Query = $..a.b.*.a..b - let label_a = JsonString::new("a"); - let label_b = JsonString::new("b"); + let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Member(&label_a)), - NfaState::Direct(nfa::Transition::Member(&label_b)), + NfaState::Recursive(nfa::Transition::Member(label_a.clone())), + NfaState::Direct(nfa::Transition::Member(label_b.clone())), NfaState::Direct(nfa::Transition::Wildcard), - NfaState::Direct(nfa::Transition::Member(&label_a)), - NfaState::Recursive(nfa::Transition::Member(&label_b)), + NfaState::Direct(nfa::Transition::Member(label_a.clone())), + NfaState::Recursive(nfa::Transition::Member(label_b.clone())), NfaState::Accepting, ], }; @@ -647,43 +649,43 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(2))], + member_transitions: smallvec![(label_a.clone(), State(2))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(2)), (&label_b, State(3))], + member_transitions: smallvec![(label_a.clone(), State(2)), (label_b.clone(), State(3))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(4))], + member_transitions: smallvec![(label_a.clone(), State(4))], fallback_state: State(7), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5)), (&label_b, State(3))], + member_transitions: smallvec![(label_a.clone(), State(5)), (label_b.clone(), State(3))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_b, State(6))], + member_transitions: smallvec![(label_b.clone(), State(6))], fallback_state: State(5), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_b, State(6))], + member_transitions: smallvec![(label_b, State(6))], fallback_state: State(5), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5))], + member_transitions: smallvec![(label_a, State(5))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, @@ -696,11 +698,11 @@ mod tests { #[test] fn simple_multi_accepting() { // Query = $..a.* - let label = JsonString::new("a"); + let label = Rc::new(StringPattern::new(&JsonString::new("a"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Member(&label)), + NfaState::Recursive(nfa::Transition::Member(label.clone())), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Accepting, ], @@ -718,25 +720,25 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(2)),], + member_transitions: smallvec![(label.clone(), State(2)),], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(3))], + member_transitions: smallvec![(label.clone(), State(3))], fallback_state: State(4), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(3))], + member_transitions: smallvec![(label.clone(), State(3))], fallback_state: State(4), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(2))], + member_transitions: smallvec![(label, State(2))], fallback_state: State(1), attributes: StateAttributes::ACCEPTING, }, @@ -794,11 +796,11 @@ mod tests { #[test] fn chained_wildcard_children() { // Query = $.a.*.*.* - let label = JsonString::new("a"); + let label = Rc::new(StringPattern::new(&JsonString::new("a"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Member(&label)), + NfaState::Direct(nfa::Transition::Member(label.clone())), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Direct(nfa::Transition::Wildcard), @@ -818,7 +820,7 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(2))], + member_transitions: smallvec![(label, State(2))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, @@ -855,11 +857,11 @@ mod tests { #[test] fn chained_wildcard_children_after_descendant() { // Query = $..a.*.* - let label = JsonString::new("a"); + let label = Rc::new(StringPattern::new(&JsonString::new("a"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Member(&label)), + NfaState::Recursive(nfa::Transition::Member(label.clone())), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Accepting, @@ -878,49 +880,49 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(2))], + member_transitions: smallvec![(label.clone(), State(2))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(3))], + member_transitions: smallvec![(label.clone(), State(3))], fallback_state: State(7), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(4))], + member_transitions: smallvec![(label.clone(), State(4))], fallback_state: State(5), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(4))], + member_transitions: smallvec![(label.clone(), State(4))], fallback_state: State(5), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(6))], + member_transitions: smallvec![(label.clone(), State(6))], fallback_state: State(8), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(3))], + member_transitions: smallvec![(label.clone(), State(3))], fallback_state: State(7), attributes: StateAttributes::ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(6))], + member_transitions: smallvec![(label.clone(), State(6))], fallback_state: State(8), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label, State(2))], + member_transitions: smallvec![(label, State(2))], fallback_state: State(1), attributes: StateAttributes::ACCEPTING, }, @@ -933,21 +935,21 @@ mod tests { #[test] fn child_and_descendant() { // Query = $.x..a.b.a.b.c..d - let label_a = JsonString::new("a"); - let label_b = JsonString::new("b"); - let label_c = JsonString::new("c"); - let label_d = JsonString::new("d"); - let label_x = JsonString::new("x"); + let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); + let label_c = Rc::new(StringPattern::new(&JsonString::new("c"))); + let label_d = Rc::new(StringPattern::new(&JsonString::new("d"))); + let label_x = Rc::new(StringPattern::new(&JsonString::new("x"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Member(&label_x)), - NfaState::Recursive(nfa::Transition::Member(&label_a)), - NfaState::Direct(nfa::Transition::Member(&label_b)), - NfaState::Direct(nfa::Transition::Member(&label_a)), - NfaState::Direct(nfa::Transition::Member(&label_b)), - NfaState::Direct(nfa::Transition::Member(&label_c)), - NfaState::Recursive(nfa::Transition::Member(&label_d)), + NfaState::Direct(nfa::Transition::Member(label_x.clone())), + NfaState::Recursive(nfa::Transition::Member(label_a.clone())), + NfaState::Direct(nfa::Transition::Member(label_b.clone())), + NfaState::Direct(nfa::Transition::Member(label_a.clone())), + NfaState::Direct(nfa::Transition::Member(label_b.clone())), + NfaState::Direct(nfa::Transition::Member(label_c.clone())), + NfaState::Recursive(nfa::Transition::Member(label_d.clone())), NfaState::Accepting, ], }; @@ -964,49 +966,49 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_x, State(2))], + member_transitions: smallvec![(label_x, State(2))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(3))], + member_transitions: smallvec![(label_a.clone(), State(3))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(3)), (&label_b, State(4))], + member_transitions: smallvec![(label_a.clone(), State(3)), (label_b.clone(), State(4))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5))], + member_transitions: smallvec![(label_a.clone(), State(5))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(3)), (&label_b, State(6))], + member_transitions: smallvec![(label_a.clone(), State(3)), (label_b, State(6))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5)), (&label_c, State(7))], + member_transitions: smallvec![(label_a, State(5)), (label_c, State(7))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_d, State(8))], + member_transitions: smallvec![(label_d.clone(), State(8))], fallback_state: State(7), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_d, State(8))], + member_transitions: smallvec![(label_d, State(8))], fallback_state: State(7), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, @@ -1019,17 +1021,17 @@ mod tests { #[test] fn child_descendant_and_child_wildcard() { // Query = $.x.*..a.*.b - let label_a = JsonString::new("a"); - let label_b = JsonString::new("b"); - let label_x = JsonString::new("x"); + let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); + let label_x = Rc::new(StringPattern::new(&JsonString::new("x"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Member(&label_x)), + NfaState::Direct(nfa::Transition::Member(label_x.clone())), NfaState::Direct(nfa::Transition::Wildcard), - NfaState::Recursive(nfa::Transition::Member(&label_a)), + NfaState::Recursive(nfa::Transition::Member(label_a.clone())), NfaState::Direct(nfa::Transition::Wildcard), - NfaState::Direct(nfa::Transition::Member(&label_b)), + NfaState::Direct(nfa::Transition::Member(label_b.clone())), NfaState::Accepting, ], }; @@ -1046,7 +1048,7 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_x, State(2))], + member_transitions: smallvec![(label_x, State(2))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, @@ -1058,37 +1060,37 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(4))], + member_transitions: smallvec![(label_a.clone(), State(4))], fallback_state: State(3), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5))], + member_transitions: smallvec![(label_a.clone(), State(5))], fallback_state: State(8), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(5)), (&label_b, State(6))], + member_transitions: smallvec![(label_a.clone(), State(5)), (label_b.clone(), State(6))], fallback_state: State(8), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(4)), (&label_b, State(7))], + member_transitions: smallvec![(label_a.clone(), State(4)), (label_b.clone(), State(7))], fallback_state: State(3), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(4))], + member_transitions: smallvec![(label_a.clone(), State(4))], fallback_state: State(3), attributes: StateAttributes::ACCEPTING, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(4)), (&label_b, State(7))], + member_transitions: smallvec![(label_a, State(4)), (label_b, State(7))], fallback_state: State(3), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, @@ -1101,17 +1103,17 @@ mod tests { #[test] fn all_name_and_wildcard_selectors() { // Query = $.a.b..c..d.*..* - let label_a = JsonString::new("a"); - let label_b = JsonString::new("b"); - let label_c = JsonString::new("c"); - let label_d = JsonString::new("d"); + let label_a = Rc::new(StringPattern::new(&JsonString::new("a"))); + let label_b = Rc::new(StringPattern::new(&JsonString::new("b"))); + let label_c = Rc::new(StringPattern::new(&JsonString::new("c"))); + let label_d = Rc::new(StringPattern::new(&JsonString::new("d"))); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Member(&label_a)), - NfaState::Direct(nfa::Transition::Member(&label_b)), - NfaState::Recursive(nfa::Transition::Member(&label_c)), - NfaState::Recursive(nfa::Transition::Member(&label_d)), + NfaState::Direct(nfa::Transition::Member(label_a.clone())), + NfaState::Direct(nfa::Transition::Member(label_b.clone())), + NfaState::Recursive(nfa::Transition::Member(label_c.clone())), + NfaState::Recursive(nfa::Transition::Member(label_d.clone())), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Recursive(nfa::Transition::Wildcard), NfaState::Accepting, @@ -1127,31 +1129,31 @@ mod tests { }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_a, State(2)),], + member_transitions: smallvec![(label_a, State(2)),], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_b, State(3))], + member_transitions: smallvec![(label_b, State(3))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_c, State(4))], + member_transitions: smallvec![(label_c, State(4))], fallback_state: State(3), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_d, State(5))], + member_transitions: smallvec![(label_d.clone(), State(5))], fallback_state: State(4), attributes: StateAttributes::EMPTY, }, StateTable { array_transitions: smallvec![], - member_transitions: smallvec![(&label_d, State(6))], + member_transitions: smallvec![(label_d, State(6))], fallback_state: State(6), attributes: StateAttributes::EMPTY, }, diff --git a/crates/rsonpath-lib/src/automaton/nfa.rs b/crates/rsonpath-lib/src/automaton/nfa.rs index 7c726b09..74df00ca 100644 --- a/crates/rsonpath-lib/src/automaton/nfa.rs +++ b/crates/rsonpath-lib/src/automaton/nfa.rs @@ -1,40 +1,40 @@ //! Definition of a nondeterministic automaton that can be directly //! obtained from a JsonPath query. This is then turned into //! a DFA with the minimizer. -use crate::{automaton::SimpleSlice, error::UnsupportedFeatureError}; +use crate::{automaton::SimpleSlice, error::UnsupportedFeatureError, string_pattern::StringPattern}; use super::{error::CompilerError, ArrayTransitionLabel}; use rsonpath_syntax::{str::JsonString, JsonPathQuery, Step}; -use std::{fmt::Display, ops::Index}; +use std::{collections::HashMap, fmt::Display, ops::Index, rc::Rc}; /// An NFA representing a query. It is always a directed path /// from an initial state to the unique accepting state at the end, /// where transitions are either self-loops or go forward to the immediate /// successor in the path. #[derive(Debug, PartialEq, Eq)] -pub(super) struct NondeterministicAutomaton<'q> { - pub(super) ordered_states: Vec>, +pub(super) struct NondeterministicAutomaton { + pub(super) ordered_states: Vec, } /// Types of states allowed in an NFA directly mapped from a [`JsonPathQuery`]. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(super) enum NfaState<'q> { +#[derive(Clone, Debug, PartialEq, Eq)] +pub(super) enum NfaState { /// A state with a single forward [`Transition`] only. - Direct(Transition<'q>), + Direct(Transition), /// A state with a forward [`Transition`] and a wildcard self-loop. - Recursive(Transition<'q>), + Recursive(Transition), /// The final state in the NFA with no outgoing transitions. Accepting, } use NfaState::*; /// A transition in the NFA mapped from a [`JsonPathQuery`] selector. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(super) enum Transition<'q> { +#[derive(Clone, Debug, PartialEq, Eq)] +pub(super) enum Transition { /// A transition matching array indices. Array(ArrayTransitionLabel), /// A transition matching a specific member. - Member(&'q JsonString), + Member(Rc), /// A transition matching anything. Wildcard, } @@ -58,7 +58,7 @@ impl NfaStateId { } } -impl<'q> NondeterministicAutomaton<'q> { +impl NondeterministicAutomaton { /// Translate a [`JsonPathQuery`] into an NFA. /// /// # Errors @@ -67,8 +67,11 @@ impl<'q> NondeterministicAutomaton<'q> { /// /// Returns a [`CompilerError::NotSupported`] if the query contains a construct /// not currently supported by rsonpath. - pub(super) fn new(query: &'q JsonPathQuery) -> Result { + pub(super) fn new<'q>(query: &'q JsonPathQuery) -> Result { use rsonpath_syntax::{Index, Selector}; + use std::collections::hash_map::Entry; + + let mut string_pattern_cache: HashMap<&'q JsonString, Rc> = HashMap::new(); let states_result: Result, CompilerError> = query .segments() @@ -80,7 +83,17 @@ impl<'q> NondeterministicAutomaton<'q> { Err(UnsupportedFeatureError::multiple_selectors().into()) } else { let transition = match selectors.first() { - Selector::Name(name) => Ok::<_, CompilerError>(Transition::Member(name)), + Selector::Name(name) => { + let pattern = match string_pattern_cache.entry(name) { + Entry::Occupied(pat) => pat.get().clone(), + Entry::Vacant(entry) => { + let pat = Rc::new(StringPattern::new(name)); + entry.insert(pat.clone()); + pat + } + }; + Ok::<_, CompilerError>(Transition::Member(pattern)) + } Selector::Wildcard => Ok(Transition::Wildcard), Selector::Index(Index::FromStart(index)) => Ok(Transition::Array((*index).into())), Selector::Index(Index::FromEnd(_)) => Err(UnsupportedFeatureError::indexing_from_end().into()), @@ -119,7 +132,7 @@ impl<'q> NondeterministicAutomaton<'q> { if let Err(err) = accepting_state { Err(CompilerError::QueryTooComplex(Some(err))) } else { - Ok(NondeterministicAutomaton { ordered_states: states }) + Ok(Self { ordered_states: states }) } } @@ -129,8 +142,8 @@ impl<'q> NondeterministicAutomaton<'q> { } } -impl<'q> Index for NondeterministicAutomaton<'q> { - type Output = NfaState<'q>; +impl Index for NondeterministicAutomaton { + type Output = NfaState; fn index(&self, index: NfaStateId) -> &Self::Output { &self.ordered_states[index.0 as usize] @@ -143,7 +156,7 @@ impl Display for NfaStateId { } } -impl Display for NondeterministicAutomaton<'_> { +impl Display for NondeterministicAutomaton { // This is the format for https://paperman.name/semigroup/ // for easy debugging of minimization. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -152,7 +165,7 @@ impl Display for NondeterministicAutomaton<'_> { .iter() .filter_map(|s| match s { Direct(Transition::Member(label)) | Recursive(Transition::Member(label)) => { - Some(label.unquoted().to_string()) + Some(stringify_label(label).to_string()) } Direct(Transition::Array(label)) | Recursive(Transition::Array(label)) => Some(label.to_string()), _ => None, @@ -165,7 +178,7 @@ impl Display for NondeterministicAutomaton<'_> { writeln!(f, "s{i}.{} -> s{};", label, i + 1)?; } Direct(Transition::Member(label)) => { - writeln!(f, "s{i}.{} -> s{};", label.unquoted(), i + 1)?; + writeln!(f, "s{i}.{} -> s{};", stringify_label(label), i + 1)?; } Direct(Transition::Wildcard) => { for label in &all_labels { @@ -174,8 +187,8 @@ impl Display for NondeterministicAutomaton<'_> { writeln!(f, "s{i}.X -> s{};", i + 1)?; } Recursive(Transition::Member(label)) => { - writeln!(f, "s{i}.{} -> s{i}, s{};", label.unquoted(), i + 1)?; - for label in all_labels.iter().filter(|&l| l != label.unquoted()) { + writeln!(f, "s{i}.{} -> s{i}, s{};", stringify_label(label), i + 1)?; + for label in all_labels.iter().filter(|&l| l != stringify_label(label)) { writeln!(f, "s{i}.{} -> s{i};", label)?; } writeln!(f, "s{i}.X -> s{i};")?; @@ -196,14 +209,18 @@ impl Display for NondeterministicAutomaton<'_> { Accepting => (), } } - Ok(()) + return Ok(()); + + fn stringify_label(label: &StringPattern) -> &str { + std::str::from_utf8(label.unquoted()).expect("labels must be valid utf8") + } } } #[cfg(test)] mod tests { use super::*; - use rsonpath_syntax::builder::JsonPathQueryBuilder; + use rsonpath_syntax::{builder::JsonPathQueryBuilder, str::JsonString}; #[test] fn nfa_test() { @@ -225,10 +242,10 @@ mod tests { let expected_automaton = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(Transition::Member(&label_a)), - NfaState::Direct(Transition::Member(&label_b)), - NfaState::Recursive(Transition::Member(&label_c)), - NfaState::Recursive(Transition::Member(&label_d)), + NfaState::Direct(Transition::Member(Rc::new(StringPattern::new(&label_a)))), + NfaState::Direct(Transition::Member(Rc::new(StringPattern::new(&label_b)))), + NfaState::Recursive(Transition::Member(Rc::new(StringPattern::new(&label_c)))), + NfaState::Recursive(Transition::Member(Rc::new(StringPattern::new(&label_d)))), NfaState::Direct(Transition::Wildcard), NfaState::Direct(Transition::Wildcard), NfaState::Recursive(Transition::Wildcard), diff --git a/crates/rsonpath-lib/src/classification/memmem.rs b/crates/rsonpath-lib/src/classification/memmem.rs index 03430d3f..8b0a3370 100644 --- a/crates/rsonpath-lib/src/classification/memmem.rs +++ b/crates/rsonpath-lib/src/classification/memmem.rs @@ -3,13 +3,13 @@ use crate::{ input::{error::InputError, Input}, result::InputRecorder, + string_pattern::StringPattern, BLOCK_SIZE, }; -use rsonpath_syntax::str::JsonString; /// Classifier that can quickly find a member name in a byte stream. pub trait Memmem<'i, 'b, 'r, I: Input, const N: usize> { - /// Find a member key identified by a given [`JsonString`]. + /// Find a member key identified by a given [`StringPattern`]. /// /// - `first_block` – optional first block to search; if not provided, /// the search will start at the next block returned by the underlying [`Input`] iterator. @@ -22,7 +22,7 @@ pub trait Memmem<'i, 'b, 'r, I: Input, const N: usize> { &mut self, first_block: Option>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError>; } diff --git a/crates/rsonpath-lib/src/classification/memmem/avx2_32.rs b/crates/rsonpath-lib/src/classification/memmem/avx2_32.rs index be0e5992..c2b189fd 100644 --- a/crates/rsonpath-lib/src/classification/memmem/avx2_32.rs +++ b/crates/rsonpath-lib/src/classification/memmem/avx2_32.rs @@ -51,7 +51,7 @@ where #[inline(always)] unsafe fn find_empty( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { let classifier = vector_256::BlockClassifier256::new(b'"', b'"'); @@ -86,10 +86,10 @@ where #[inline(always)] unsafe fn find_letter( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { - let classifier = vector_256::BlockClassifier256::new(label.unquoted().as_bytes()[0], b'"'); + let classifier = vector_256::BlockClassifier256::new(label.unquoted()[0], b'"'); let mut previous_block: u32 = 0; while let Some(block) = self.iter.next().e()? { @@ -116,7 +116,7 @@ where #[inline(always)] unsafe fn find_label_avx2( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { if label.unquoted().is_empty() { @@ -125,8 +125,7 @@ where return self.find_letter(label, offset); } - let classifier = - vector_256::BlockClassifier256::new(label.unquoted().as_bytes()[0], label.unquoted().as_bytes()[1]); + let classifier = vector_256::BlockClassifier256::new(label.unquoted()[0], label.unquoted()[1]); let mut previous_block: u32 = 0; while let Some(block) = self.iter.next().e()? { @@ -162,7 +161,7 @@ where &mut self, first_block: Option>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError> { if let Some(b) = first_block { if let Some(res) = shared::find_label_in_first_block(self.input, b, start_idx, label)? { diff --git a/crates/rsonpath-lib/src/classification/memmem/avx2_64.rs b/crates/rsonpath-lib/src/classification/memmem/avx2_64.rs index 3e2227de..36937ad8 100644 --- a/crates/rsonpath-lib/src/classification/memmem/avx2_64.rs +++ b/crates/rsonpath-lib/src/classification/memmem/avx2_64.rs @@ -54,7 +54,7 @@ where #[inline(always)] unsafe fn find_empty( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { let classifier = vector_256::BlockClassifier256::new(b'"', b'"'); @@ -94,10 +94,10 @@ where #[inline(always)] unsafe fn find_letter( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { - let classifier = vector_256::BlockClassifier256::new(label.unquoted().as_bytes()[0], b'"'); + let classifier = vector_256::BlockClassifier256::new(label.unquoted()[0], b'"'); let mut previous_block: u64 = 0; while let Some(block) = self.iter.next().e()? { @@ -124,7 +124,7 @@ where #[inline(always)] unsafe fn find_label_avx2( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { if label.unquoted().is_empty() { @@ -133,8 +133,7 @@ where return self.find_letter(label, offset); } - let classifier = - vector_256::BlockClassifier256::new(label.unquoted().as_bytes()[0], label.unquoted().as_bytes()[1]); + let classifier = vector_256::BlockClassifier256::new(label.unquoted()[0], label.unquoted()[1]); let mut previous_block: u64 = 0; while let Some(block) = self.iter.next().e()? { @@ -170,7 +169,7 @@ where &mut self, first_block: Option>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError> { if let Some(b) = first_block { if let Some(res) = shared::find_label_in_first_block(self.input, b, start_idx, label)? { diff --git a/crates/rsonpath-lib/src/classification/memmem/nosimd.rs b/crates/rsonpath-lib/src/classification/memmem/nosimd.rs index 8d812ac8..19948437 100644 --- a/crates/rsonpath-lib/src/classification/memmem/nosimd.rs +++ b/crates/rsonpath-lib/src/classification/memmem/nosimd.rs @@ -42,14 +42,14 @@ where #[inline] fn find_label_sequential( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { let label_size = label.quoted().len(); let first_c = if label.unquoted().is_empty() { b'"' } else { - label.unquoted().as_bytes()[0] + label.unquoted()[0] }; while let Some(block) = self.iter.next().e()? { @@ -78,7 +78,7 @@ where &mut self, first_block: Option>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError> { if let Some(b) = first_block { if let Some(res) = shared::find_label_in_first_block(self.input, b, start_idx, label)? { diff --git a/crates/rsonpath-lib/src/classification/memmem/shared.rs b/crates/rsonpath-lib/src/classification/memmem/shared.rs index c077c5bb..adef4b2b 100644 --- a/crates/rsonpath-lib/src/classification/memmem/shared.rs +++ b/crates/rsonpath-lib/src/classification/memmem/shared.rs @@ -1,8 +1,10 @@ -use crate::input::{ - error::{InputError, InputErrorConvertible}, - Input, +use crate::{ + input::{ + error::{InputError, InputErrorConvertible}, + Input, + }, + string_pattern::StringPattern, }; -use rsonpath_syntax::str::JsonString; #[cfg(target_arch = "x86")] pub(super) mod mask_32; @@ -17,7 +19,7 @@ pub(crate) fn find_label_in_first_block<'i, 'r, I, const N: usize>( input: &I, first_block: I::Block<'i, N>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError> where I: Input, diff --git a/crates/rsonpath-lib/src/classification/memmem/shared/mask_32.rs b/crates/rsonpath-lib/src/classification/memmem/shared/mask_32.rs index 781c76ae..57a59599 100644 --- a/crates/rsonpath-lib/src/classification/memmem/shared/mask_32.rs +++ b/crates/rsonpath-lib/src/classification/memmem/shared/mask_32.rs @@ -1,13 +1,16 @@ -use crate::input::{ - error::{InputError, InputErrorConvertible}, - Input, +use crate::{ + debug, + input::{ + error::{InputError, InputErrorConvertible}, + Input, + }, + string_pattern::StringPattern, }; -use rsonpath_syntax::str::JsonString; #[inline(always)] pub(crate) fn find_in_mask( input: &I, - label: &JsonString, + label: &StringPattern, previous_block: u32, first: u32, second: u32, diff --git a/crates/rsonpath-lib/src/classification/memmem/shared/mask_64.rs b/crates/rsonpath-lib/src/classification/memmem/shared/mask_64.rs index 72373e7b..97c2f7f5 100644 --- a/crates/rsonpath-lib/src/classification/memmem/shared/mask_64.rs +++ b/crates/rsonpath-lib/src/classification/memmem/shared/mask_64.rs @@ -4,13 +4,13 @@ use crate::{ error::{InputError, InputErrorConvertible}, Input, }, + string_pattern::StringPattern, }; -use rsonpath_syntax::str::JsonString; #[inline(always)] pub(crate) fn find_in_mask( input: &I, - label: &JsonString, + label: &StringPattern, previous_block: u64, first: u64, second: u64, diff --git a/crates/rsonpath-lib/src/classification/memmem/sse2_32.rs b/crates/rsonpath-lib/src/classification/memmem/sse2_32.rs index 02d7677d..3df64a94 100644 --- a/crates/rsonpath-lib/src/classification/memmem/sse2_32.rs +++ b/crates/rsonpath-lib/src/classification/memmem/sse2_32.rs @@ -54,7 +54,7 @@ where #[inline(always)] unsafe fn find_empty( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { let classifier = vector_128::BlockClassifier128::new(b'"', b'"'); @@ -94,10 +94,10 @@ where #[inline(always)] unsafe fn find_letter( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { - let classifier = vector_128::BlockClassifier128::new(label.unquoted().as_bytes()[0], b'"'); + let classifier = vector_128::BlockClassifier128::new(label.unquoted()[0], b'"'); let mut previous_block: u32 = 0; while let Some(block) = self.iter.next().e()? { @@ -124,7 +124,7 @@ where #[inline(always)] unsafe fn find_label_sse2( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { if label.unquoted().is_empty() { @@ -133,8 +133,7 @@ where return self.find_letter(label, offset); } - let classifier = - vector_128::BlockClassifier128::new(label.unquoted().as_bytes()[0], label.unquoted().as_bytes()[1]); + let classifier = vector_128::BlockClassifier128::new(label.unquoted()[0], label.unquoted()[1]); let mut previous_block: u32 = 0; while let Some(block) = self.iter.next().e()? { @@ -170,7 +169,7 @@ where &mut self, first_block: Option>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError> { if let Some(b) = first_block { if let Some(res) = shared::find_label_in_first_block(self.input, b, start_idx, label)? { diff --git a/crates/rsonpath-lib/src/classification/memmem/sse2_64.rs b/crates/rsonpath-lib/src/classification/memmem/sse2_64.rs index af177e4c..26775e35 100644 --- a/crates/rsonpath-lib/src/classification/memmem/sse2_64.rs +++ b/crates/rsonpath-lib/src/classification/memmem/sse2_64.rs @@ -54,7 +54,7 @@ where #[inline(always)] unsafe fn find_empty( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { let classifier = vector_128::BlockClassifier128::new(b'"', b'"'); @@ -106,10 +106,10 @@ where #[inline(always)] unsafe fn find_letter( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { - let classifier = vector_128::BlockClassifier128::new(label.unquoted().as_bytes()[0], b'"'); + let classifier = vector_128::BlockClassifier128::new(label.unquoted()[0], b'"'); let mut previous_block: u64 = 0; while let Some(block) = self.iter.next().e()? { @@ -148,7 +148,7 @@ where #[inline(always)] unsafe fn find_label_sse2( &mut self, - label: &JsonString, + label: &StringPattern, mut offset: usize, ) -> Result)>, InputError> { if label.unquoted().is_empty() { @@ -157,8 +157,7 @@ where return self.find_letter(label, offset); } - let classifier = - vector_128::BlockClassifier128::new(label.unquoted().as_bytes()[0], label.unquoted().as_bytes()[1]); + let classifier = vector_128::BlockClassifier128::new(label.unquoted()[0], label.unquoted()[1]); let mut previous_block: u64 = 0; while let Some(block) = self.iter.next().e()? { @@ -206,7 +205,7 @@ where &mut self, first_block: Option>, start_idx: usize, - label: &JsonString, + label: &StringPattern, ) -> Result)>, InputError> { if let Some(b) = first_block { if let Some(res) = shared::find_label_in_first_block(self.input, b, start_idx, label)? { diff --git a/crates/rsonpath-lib/src/engine.rs b/crates/rsonpath-lib/src/engine.rs index d8456b9a..a2abde6c 100644 --- a/crates/rsonpath-lib/src/engine.rs +++ b/crates/rsonpath-lib/src/engine.rs @@ -101,15 +101,15 @@ pub trait Engine { pub trait Compiler { /// Concrete type of the [`Engines`](`Engine`) created, /// parameterized with the lifetime of the input query. - type E<'q>: Engine + 'q; + type E: Engine; /// Compile a [`JsonPathQuery`] into an [`Engine`].c /// /// # Errors /// An appropriate [`CompilerError`] is returned if the compiler /// cannot handle the query. - fn compile_query(query: &JsonPathQuery) -> Result, CompilerError>; + fn compile_query(query: &JsonPathQuery) -> Result; /// Turn a compiled [`Automaton`] into an [`Engine`]. - fn from_compiled_query(automaton: Automaton<'_>) -> Self::E<'_>; + fn from_compiled_query(automaton: Automaton) -> Self::E; } diff --git a/crates/rsonpath-lib/src/engine/head_skipping.rs b/crates/rsonpath-lib/src/engine/head_skipping.rs index 5bfacc0e..55fccc8e 100644 --- a/crates/rsonpath-lib/src/engine/head_skipping.rs +++ b/crates/rsonpath-lib/src/engine/head_skipping.rs @@ -1,6 +1,8 @@ //! Engine decorator that performs **head skipping** – an extremely optimized search for //! the first matching member name in a query starting with a self-looping state. //! This happens in queries starting with a descendant selector. +use std::rc::Rc; + use crate::{ automaton::{Automaton, State}, classification::{ @@ -19,9 +21,9 @@ use crate::{ Input, InputBlockIterator, }, result::Recorder, + string_pattern::StringPattern, FallibleIterator, MaskType, BLOCK_SIZE, }; -use rsonpath_syntax::str::JsonString; /// Trait that needs to be implemented by an [`Engine`](`super::Engine`) to use this submodule. pub(super) trait CanHeadSkip<'i, 'r, I, R, V> @@ -61,15 +63,15 @@ where V: Simd; /// Configuration of the head-skipping decorator. -pub(super) struct HeadSkip<'b, 'q, I, V, const N: usize> { +pub(super) struct HeadSkip<'b, I, V, const N: usize> { bytes: &'b I, state: State, is_accepting: bool, - member_name: &'q JsonString, + member_name: Rc, simd: V, } -impl<'b, 'q, I: Input, V: Simd> HeadSkip<'b, 'q, I, V, BLOCK_SIZE> { +impl<'b, I: Input, V: Simd> HeadSkip<'b, I, V, BLOCK_SIZE> { /// Create a new instance of the head-skipping decorator over a given input /// and for a compiled query [`Automaton`]. /// @@ -92,7 +94,7 @@ impl<'b, 'q, I: Input, V: Simd> HeadSkip<'b, 'q, I, V, BLOCK_SIZE> { /// extremely quickly with [`classification::memmem`](crate::classification::memmem). /// /// In all other cases, head-skipping is not supported. - pub(super) fn new(bytes: &'b I, automaton: &'b Automaton<'q>, simd: V) -> Option { + pub(super) fn new(bytes: &'b I, automaton: &Automaton, simd: V) -> Option { let initial_state = automaton.initial_state(); let fallback_state = automaton[initial_state].fallback_state(); let transitions = automaton[initial_state].member_transitions(); @@ -101,13 +103,13 @@ impl<'b, 'q, I: Input, V: Simd> HeadSkip<'b, 'q, I, V, BLOCK_SIZE> { && transitions.len() == 1 && automaton[initial_state].array_transitions().is_empty() { - let (member_name, target_state) = transitions[0]; + let (member_name, target_state) = &transitions[0]; debug!("Automaton starts with a descendant search, using memmem heuristic."); return Some(Self { bytes, - state: target_state, - is_accepting: automaton.is_accepting(target_state), - member_name, + state: *target_state, + is_accepting: automaton.is_accepting(*target_state), + member_name: member_name.clone(), simd, }); } @@ -124,7 +126,7 @@ impl<'b, 'q, I: Input, V: Simd> HeadSkip<'b, 'q, I, V, BLOCK_SIZE> { R: Recorder> + 'r, { dispatch_simd!(self.simd; self, engine => - fn<'b, 'q, 'r, I, V, E, R>(head_skip: &HeadSkip<'b, 'q, I, V, BLOCK_SIZE>, engine: &mut E) -> Result<(), EngineError> + fn<'b, 'r, I, V, E, R>(head_skip: &HeadSkip<'b, I, V, BLOCK_SIZE>, engine: &mut E) -> Result<(), EngineError> where 'b: 'r, E: CanHeadSkip<'b, 'r, I, R, V>, @@ -140,7 +142,7 @@ impl<'b, 'q, I: Input, V: Simd> HeadSkip<'b, 'q, I, V, BLOCK_SIZE> { let mut memmem = head_skip.simd.memmem(head_skip.bytes, &mut input_iter); debug!("Starting memmem search from {idx}"); - if let Some((starting_quote_idx, last_block)) = memmem.find_label(first_block, idx, head_skip.member_name)? { + if let Some((starting_quote_idx, last_block)) = memmem.find_label(first_block, idx, head_skip.member_name.as_ref())? { drop(memmem); first_block = Some(last_block); diff --git a/crates/rsonpath-lib/src/engine/main.rs b/crates/rsonpath-lib/src/engine/main.rs index d5e60509..77dfb103 100644 --- a/crates/rsonpath-lib/src/engine/main.rs +++ b/crates/rsonpath-lib/src/engine/main.rs @@ -59,38 +59,39 @@ use crate::{ approx_span::ApproxSpanRecorder, count::CountRecorder, index::IndexRecorder, nodes::NodesRecorder, Match, MatchCount, MatchIndex, MatchSpan, MatchedNodeType, Recorder, Sink, }, + string_pattern::StringPattern, FallibleIterator, MaskType, BLOCK_SIZE, }; -use rsonpath_syntax::{num::JsonUInt, str::JsonString, JsonPathQuery}; +use rsonpath_syntax::{num::JsonUInt, JsonPathQuery}; use smallvec::{smallvec, SmallVec}; /// Main engine for a fixed JSONPath query. /// /// The engine is stateless, meaning that it can be executed /// on any number of separate inputs, even on separate threads. -pub struct MainEngine<'q> { - automaton: Automaton<'q>, +pub struct MainEngine { + automaton: Automaton, simd: SimdConfiguration, } -impl Compiler for MainEngine<'_> { - type E<'q> = MainEngine<'q>; +impl Compiler for MainEngine { + type E = Self; #[must_use = "compiling the query only creates an engine instance that should be used"] #[inline(always)] - fn compile_query(query: &JsonPathQuery) -> Result { + fn compile_query(query: &JsonPathQuery) -> Result { let automaton = Automaton::new(query)?; debug!("DFA:\n {}", automaton); let simd = simd::configure(); log::info!("SIMD configuration:\n {}", simd); - Ok(MainEngine { automaton, simd }) + Ok(Self { automaton, simd }) } #[inline(always)] - fn from_compiled_query(automaton: Automaton<'_>) -> Self::E<'_> { + fn from_compiled_query(automaton: Automaton) -> Self::E { let simd = simd::configure(); log::info!("SIMD configuration:\n {}", simd); - MainEngine { automaton, simd } + Self { automaton, simd } } } @@ -104,7 +105,7 @@ impl Compiler for MainEngine<'_> { * - we set up an appropriate Recorder impl for the result type. * - we configure SIMD and run the Executor in its context. */ -impl Engine for MainEngine<'_> { +impl Engine for MainEngine { #[inline] fn count(&self, input: &I) -> Result where @@ -209,7 +210,7 @@ macro_rules! Classifier { } /// This is the heart of an Engine run that holds the entire execution state. -struct Executor<'i, 'q, 'r, I, R, V> { +struct Executor<'i, 'r, I, R, V> { /// Current depth in the JSON tree. depth: Depth, /// Current automaton state. @@ -224,7 +225,7 @@ struct Executor<'i, 'q, 'r, I, R, V> { /// Execution stack. stack: SmallStack, /// Read-only access to the query automaton. - automaton: &'i Automaton<'q>, + automaton: &'i Automaton, /// Handle to the input. input: &'i I, /// Handle to the recorder. @@ -234,12 +235,12 @@ struct Executor<'i, 'q, 'r, I, R, V> { } /// Initialize the [`Executor`] for the initial state of a query. -fn query_executor<'i, 'q, 'r, I, R, V: Simd>( - automaton: &'i Automaton<'q>, +fn query_executor<'i, 'r, I, R, V: Simd>( + automaton: &'i Automaton, input: &'i I, recorder: &'r R, simd: V, -) -> Executor<'i, 'q, 'r, I, R, V> +) -> Executor<'i, 'r, I, R, V> where I: Input, R: Recorder>, @@ -258,7 +259,7 @@ where } } -impl<'i, 'r, I, R, V> Executor<'i, '_, 'r, I, R, V> +impl<'i, 'r, I, R, V> Executor<'i, 'r, I, R, V> where 'i: 'r, I: Input, @@ -295,7 +296,7 @@ where /// Once the perceived depth of the document goes to zero, this method terminates. fn run_on_subtree(&mut self, classifier: &mut Classifier!()) -> Result<(), EngineError> { dispatch_simd!(self.simd; self, classifier => - fn<'i, 'q, 'r, I, R, V>(eng: &mut Executor<'i, 'q, 'r, I, R, V>, classifier: &mut Classifier!()) -> Result<(), EngineError> + fn<'i, 'r, I, R, V>(eng: &mut Executor<'i, 'r, I, R, V>, classifier: &mut Classifier!()) -> Result<(), EngineError> where 'i: 'r, I: Input, @@ -362,8 +363,8 @@ where // Look at accepting transitions and try to match them with the label. let mut any_matched = false; - for &(member_name, target) in self.automaton[self.state].member_transitions() { - if self.automaton.is_accepting(target) && self.is_match(idx, member_name)? { + for (member_name, target) in self.automaton[self.state].member_transitions() { + if self.automaton.is_accepting(*target) && self.is_match(idx, member_name.as_ref())? { self.record_match_detected_at(idx + 1, NodeType::Atomic)?; any_matched = true; break; @@ -470,12 +471,12 @@ where } else { let colon_idx = self.find_preceding_colon(idx); - for &(member_name, target) in self.automaton[self.state].member_transitions() { + for (member_name, target) in self.automaton[self.state].member_transitions() { if let Some(colon_idx) = colon_idx { - if self.is_match(colon_idx, member_name)? { + if self.is_match(colon_idx, member_name.as_ref())? { any_matched = true; - self.transition_to(target, bracket_type); - if self.automaton.is_accepting(target) { + self.transition_to(*target, bracket_type); + if self.automaton.is_accepting(*target) { debug!("Accept {idx}"); self.record_match_detected_at(colon_idx + 1, NodeType::Complex(bracket_type))?; } @@ -653,7 +654,7 @@ where /// Check if the label ended with a colon at index `idx` matches the `member_name`. #[inline(always)] - fn is_match(&self, idx: usize, member_name: &JsonString) -> Result { + fn is_match(&self, idx: usize, member_name: &StringPattern) -> Result { let len = member_name.quoted().len(); // The colon can be preceded by whitespace before the actual label. @@ -751,7 +752,7 @@ impl SmallStack { } } -impl<'i, 'r, I, R, V> CanHeadSkip<'i, 'r, I, R, V> for Executor<'i, '_, 'r, I, R, V> +impl<'i, 'r, I, R, V> CanHeadSkip<'i, 'r, I, R, V> for Executor<'i, 'r, I, R, V> where I: Input, R: Recorder>, diff --git a/crates/rsonpath-lib/src/input.rs b/crates/rsonpath-lib/src/input.rs index b8d50fa3..05a022ec 100644 --- a/crates/rsonpath-lib/src/input.rs +++ b/crates/rsonpath-lib/src/input.rs @@ -26,8 +26,7 @@ pub use mmap::MmapInput; pub use owned::OwnedBytes; use self::error::InputError; -use crate::result::InputRecorder; -use rsonpath_syntax::str::JsonString; +use crate::{result::InputRecorder, string_pattern::StringPattern}; use std::ops::Deref; /// Make the struct repr(C) with alignment equal to [`MAX_BLOCK_SIZE`]. @@ -151,7 +150,7 @@ pub trait Input: Sized { /// # Errors /// This function can read more data from the input if `to` falls beyond /// the range that was already read, and the read operation can fail. - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> Result; + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result; } /// An iterator over blocks of input of size `N`. @@ -211,7 +210,7 @@ impl<'i, const N: usize> InputBlock<'i, N> for &'i [u8] { } pub(super) trait SliceSeekable { - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> bool; + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> bool; fn seek_backward(&self, from: usize, needle: u8) -> Option; diff --git a/crates/rsonpath-lib/src/input/borrowed.rs b/crates/rsonpath-lib/src/input/borrowed.rs index 58255338..70c77796 100644 --- a/crates/rsonpath-lib/src/input/borrowed.rs +++ b/crates/rsonpath-lib/src/input/borrowed.rs @@ -18,8 +18,7 @@ use super::{ padding::{EndPaddedInput, PaddedBlock, TwoSidesPaddedInput}, Input, InputBlockIterator, SliceSeekable, MAX_BLOCK_SIZE, }; -use crate::{debug, result::InputRecorder}; -use rsonpath_syntax::str::JsonString; +use crate::{debug, result::InputRecorder, string_pattern::StringPattern}; /// Input wrapping a borrowed [`[u8]`] buffer. pub struct BorrowedBytes<'a> { @@ -220,7 +219,7 @@ impl Input for BorrowedBytes<'_> { } #[inline(always)] - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> Result { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result { debug_assert!(from < to); // The hot path is when we're checking fully within the middle section. // This has to be as fast as possible, so the "cold" path referring to the TwoSidesPaddedInput @@ -231,7 +230,7 @@ impl Input for BorrowedBytes<'_> { let from = from - MAX_BLOCK_SIZE; let to = to - MAX_BLOCK_SIZE; let slice = &bytes[from..to]; - Ok(member.quoted().as_bytes() == slice && (from == 0 || bytes[from - 1] != b'\\')) + Ok(member.quoted() == slice && (from == 0 || bytes[from - 1] != b'\\')) } else { // This is a very expensive, cold path. Ok(self.as_padded_input().is_member_match(from, to, member)) diff --git a/crates/rsonpath-lib/src/input/buffered.rs b/crates/rsonpath-lib/src/input/buffered.rs index 04351ad8..f764ed6d 100644 --- a/crates/rsonpath-lib/src/input/buffered.rs +++ b/crates/rsonpath-lib/src/input/buffered.rs @@ -19,8 +19,7 @@ use super::{ error::InputError, repr_align_block_size, Input, InputBlock, InputBlockIterator, SliceSeekable, MAX_BLOCK_SIZE, }; -use crate::{error::InternalRsonpathError, result::InputRecorder, JSON_SPACE_BYTE}; -use rsonpath_syntax::str::JsonString; +use crate::{error::InternalRsonpathError, result::InputRecorder, string_pattern::StringPattern, JSON_SPACE_BYTE}; use std::{cell::RefCell, io::Read, ops::Deref, slice}; // The buffer has to be a multiple of MAX_BLOCK_SIZE. @@ -219,7 +218,7 @@ impl Input for BufferedInput { } #[inline(always)] - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> Result { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result { let mut buf = self.0.borrow_mut(); while buf.len() < to { @@ -230,7 +229,7 @@ impl Input for BufferedInput { let bytes = buf.as_slice(); let slice = &bytes[from..to]; - Ok(member.quoted().as_bytes() == slice && (from == 0 || bytes[from - 1] != b'\\')) + Ok(member.quoted() == slice && (from == 0 || bytes[from - 1] != b'\\')) } } diff --git a/crates/rsonpath-lib/src/input/mmap.rs b/crates/rsonpath-lib/src/input/mmap.rs index 5061b099..b7318b97 100644 --- a/crates/rsonpath-lib/src/input/mmap.rs +++ b/crates/rsonpath-lib/src/input/mmap.rs @@ -21,9 +21,8 @@ use super::{ padding::PaddedBlock, Input, SliceSeekable, MAX_BLOCK_SIZE, }; -use crate::{input::padding::EndPaddedInput, result::InputRecorder}; +use crate::{input::padding::EndPaddedInput, result::InputRecorder, string_pattern::StringPattern}; use memmap2::{Mmap, MmapAsRawDesc}; -use rsonpath_syntax::str::JsonString; /// Input wrapping a memory mapped file. pub struct MmapInput { @@ -162,7 +161,7 @@ impl Input for MmapInput { } #[inline] - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> Result { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result { debug_assert!(from < to); // The hot path is when we're checking fully within the middle section. // This has to be as fast as possible, so the "cold" path referring to the TwoSidesPaddedInput @@ -171,7 +170,7 @@ impl Input for MmapInput { // This is the hot path -- do the bounds check and memcmp. let bytes = &self.mmap; let slice = &bytes[from..to]; - Ok(member.quoted().as_bytes() == slice && (from == 0 || bytes[from - 1] != b'\\')) + Ok(member.quoted() == slice && (from == 0 || bytes[from - 1] != b'\\')) } else { // This is a very expensive, cold path. Ok(self.as_padded_input().is_member_match(from, to, member)) diff --git a/crates/rsonpath-lib/src/input/owned.rs b/crates/rsonpath-lib/src/input/owned.rs index dbc83205..b82d4ea0 100644 --- a/crates/rsonpath-lib/src/input/owned.rs +++ b/crates/rsonpath-lib/src/input/owned.rs @@ -24,8 +24,7 @@ use super::{ padding::{PaddedBlock, TwoSidesPaddedInput}, Input, SliceSeekable, MAX_BLOCK_SIZE, }; -use crate::result::InputRecorder; -use rsonpath_syntax::str::JsonString; +use crate::{result::InputRecorder, string_pattern::StringPattern}; use std::borrow::Borrow; /// Input wrapping a buffer borrowable as a slice of bytes. @@ -160,7 +159,7 @@ where } #[inline] - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> Result { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result { let offset = self.leading_padding_len(); let Some(from) = from.checked_sub(offset) else { return Ok(false); diff --git a/crates/rsonpath-lib/src/input/padding.rs b/crates/rsonpath-lib/src/input/padding.rs index 1b4eb8f7..93d8363c 100644 --- a/crates/rsonpath-lib/src/input/padding.rs +++ b/crates/rsonpath-lib/src/input/padding.rs @@ -1,6 +1,5 @@ use super::{SliceSeekable, MAX_BLOCK_SIZE}; -use crate::JSON_SPACE_BYTE; -use rsonpath_syntax::str::JsonString; +use crate::{string_pattern::StringPattern, JSON_SPACE_BYTE}; pub(super) struct PaddedBlock { bytes: [u8; MAX_BLOCK_SIZE], @@ -102,9 +101,9 @@ impl SliceSeekable for EndPaddedInput<'_> { #[cold] #[inline(never)] - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> bool { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> bool { debug_assert!(from < to); - let other = member.quoted().as_bytes(); + let other = member.quoted(); self.cold_member_match(other, from, to) } } @@ -160,9 +159,9 @@ impl SliceSeekable for TwoSidesPaddedInput<'_> { #[cold] #[inline(never)] - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> bool { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> bool { debug_assert!(from < to); - let other = member.quoted().as_bytes(); + let other = member.quoted(); self.cold_member_match(other, from, to) } } diff --git a/crates/rsonpath-lib/src/input/slice.rs b/crates/rsonpath-lib/src/input/slice.rs index 5544c92e..ca369008 100644 --- a/crates/rsonpath-lib/src/input/slice.rs +++ b/crates/rsonpath-lib/src/input/slice.rs @@ -1,15 +1,15 @@ use super::SliceSeekable; -use rsonpath_syntax::str::JsonString; +use crate::string_pattern::StringPattern; impl> SliceSeekable for T { - fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> bool { + fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> bool { let bytes = self.as_ref(); if to > bytes.len() { return false; } let slice = &bytes[from..to]; - member.quoted().as_bytes() == slice && (from == 0 || bytes[from - 1] != b'\\') + member.quoted() == slice && (from == 0 || bytes[from - 1] != b'\\') } fn seek_backward(&self, from: usize, needle: u8) -> Option { @@ -310,6 +310,7 @@ mod tests { mod is_member_match { use crate::input::SliceSeekable; + use crate::string_pattern::StringPattern; use pretty_assertions::assert_eq; use rsonpath_syntax::str::JsonString; @@ -317,7 +318,7 @@ mod tests { fn on_exact_match_returns_true() { let bytes = r#"{"needle":42,"other":37}"#.as_bytes(); - let result = bytes.is_member_match(1, 9, &JsonString::new("needle")); + let result = bytes.is_member_match(1, 9, &StringPattern::new(&JsonString::new("needle"))); assert_eq!(result, true); } @@ -326,7 +327,7 @@ mod tests { fn matching_without_double_quotes_returns_false() { let bytes = r#"{"needle":42,"other":37}"#.as_bytes(); - let result = bytes.is_member_match(2, 8, &JsonString::new("needle")); + let result = bytes.is_member_match(2, 8, &StringPattern::new(&JsonString::new("needle"))); assert_eq!(result, false); } @@ -335,7 +336,7 @@ mod tests { fn when_match_is_partial_due_to_escaped_double_quote_returns_false() { let bytes = r#"{"fake\"needle":42,"other":37}"#.as_bytes(); - let result = bytes.is_member_match(7, 15, &JsonString::new("needle")); + let result = bytes.is_member_match(7, 15, &StringPattern::new(&JsonString::new("needle"))); assert_eq!(result, false); } @@ -345,7 +346,7 @@ mod tests { fn when_looking_for_string_with_escaped_double_quote_returns_true() { let bytes = r#"{"fake\"needle":42,"other":37}"#.as_bytes(); - let result = bytes.is_member_match(1, 15, &JsonString::new(r#"fake"needle"#)); + let result = bytes.is_member_match(1, 15, &StringPattern::new(&JsonString::new(r#"fake"needle"#))); assert_eq!(result, true); } diff --git a/crates/rsonpath-lib/src/lib.rs b/crates/rsonpath-lib/src/lib.rs index f2784c13..0b7538b6 100644 --- a/crates/rsonpath-lib/src/lib.rs +++ b/crates/rsonpath-lib/src/lib.rs @@ -197,6 +197,9 @@ pub mod engine; pub mod error; pub mod input; pub mod result; +pub(crate) mod string_pattern; + +pub use string_pattern::StringPattern; cfg_if::cfg_if! { if #[cfg(target_pointer_width = "32")] { diff --git a/crates/rsonpath-lib/src/string_pattern.rs b/crates/rsonpath-lib/src/string_pattern.rs new file mode 100644 index 00000000..c4957efe --- /dev/null +++ b/crates/rsonpath-lib/src/string_pattern.rs @@ -0,0 +1,74 @@ +use rsonpath_syntax::str::JsonString; + +/// String pattern coming from a JSONPath query that can be matched against strings in a JSON. +/// +/// Right now the only pattern is matching against a given [`JsonString`]. +#[derive(Debug, Clone)] +pub struct StringPattern(JsonString); + +impl std::hash::Hash for StringPattern { + #[inline] + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl PartialOrd for StringPattern { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.0.unquoted().cmp(other.0.unquoted())) + } +} + +impl Ord for StringPattern { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.unquoted().cmp(other.0.unquoted()) + } +} + +impl PartialEq for StringPattern { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for StringPattern {} + +impl StringPattern { + /// Get the underlying [`JsonString`] as bytes, including the delimiting double quote symbols. + #[inline] + #[must_use] + pub fn quoted(&self) -> &[u8] { + self.0.quoted().as_bytes() + } + + /// Get the underlying [`JsonString`] as bytes, without the delimiting quotes. + #[inline] + #[must_use] + pub fn unquoted(&self) -> &[u8] { + self.0.unquoted().as_bytes() + } + + /// Create a new pattern from a given [`JsonString`]. + #[inline] + #[must_use] + pub fn new(string: &JsonString) -> Self { + Self(string.clone()) + } +} + +impl From for StringPattern { + #[inline(always)] + fn from(value: JsonString) -> Self { + Self::new(&value) + } +} + +impl From<&JsonString> for StringPattern { + #[inline(always)] + fn from(value: &JsonString) -> Self { + Self::new(value) + } +} diff --git a/crates/rsonpath-lib/tests/input_implementation_tests.rs b/crates/rsonpath-lib/tests/input_implementation_tests.rs index 8b203fa6..308426c4 100644 --- a/crates/rsonpath-lib/tests/input_implementation_tests.rs +++ b/crates/rsonpath-lib/tests/input_implementation_tests.rs @@ -2,8 +2,8 @@ use pretty_assertions::assert_eq; use rsonpath::{ input::{error::InputError, *}, result::empty::EmptyRecorder, + StringPattern, }; -use rsonpath_syntax::str::JsonString; use std::{cmp, fs, fs::File, io::Read, iter}; use test_case::test_case; @@ -143,7 +143,7 @@ impl InMemoryTestInput { } } - fn test_positive_is_member_match(&self, bytes: &[u8], from: usize, to: usize, json_string: JsonString) { + fn test_positive_is_member_match(&self, bytes: &[u8], from: usize, to: usize, json_string: StringPattern) { match self { InMemoryTestInput::Buffered => Self::test_positive_is_member_match_buffered(bytes, from, to, json_string), InMemoryTestInput::Borrowed => Self::test_positive_is_member_match_borrowed(bytes, from, to, json_string), @@ -269,7 +269,7 @@ impl InMemoryTestInput { assert_eq!(result, Some((expected, expected_byte))); } - fn test_positive_is_member_match_buffered(bytes: &[u8], from: usize, to: usize, json_string: JsonString) { + fn test_positive_is_member_match_buffered(bytes: &[u8], from: usize, to: usize, json_string: StringPattern) { let input = create_buffered(bytes); let result = input.is_member_match(from, to, &json_string).expect("match succeeds"); @@ -278,7 +278,7 @@ impl InMemoryTestInput { assert!(result); } - fn test_positive_is_member_match_borrowed(bytes: &[u8], from: usize, to: usize, json_string: JsonString) { + fn test_positive_is_member_match_borrowed(bytes: &[u8], from: usize, to: usize, json_string: StringPattern) { let input = BorrowedBytes::new(bytes); // Need to take padding into account. @@ -289,7 +289,7 @@ impl InMemoryTestInput { assert!(result); } - fn test_positive_is_member_match_owned(bytes: &[u8], from: usize, to: usize, json_string: JsonString) { + fn test_positive_is_member_match_owned(bytes: &[u8], from: usize, to: usize, json_string: StringPattern) { let input = OwnedBytes::new(bytes); // Need to take padding into account. @@ -417,6 +417,7 @@ impl Read for ReadBytes<'_> { mod in_memory_proptests { use crate::InMemoryTestInput; use proptest::prelude::*; + use rsonpath::StringPattern; use rsonpath_syntax::str::JsonString; const JSON_WHITESPACE_BYTES: [u8; 4] = [b' ', b'\t', b'\n', b'\r']; @@ -579,7 +580,7 @@ mod in_memory_proptests { prop_compose! { fn positive_is_member_match_strategy() (input in prop::collection::vec(prop::num::u8::ANY, 2..1024)) - (mut from in 0..input.len(), mut to in 0..input.len(), mut input in Just(input)) -> (Vec, usize, usize, JsonString) + (mut from in 0..input.len(), mut to in 0..input.len(), mut input in Just(input)) -> (Vec, usize, usize, StringPattern) { if from > to { std::mem::swap(&mut from, &mut to); @@ -609,6 +610,7 @@ mod in_memory_proptests { let str = "x".repeat(to - from - 2); let json_string = JsonString::new(&str); + let pattern = StringPattern::new(&json_string); let slice = &mut input[from..to]; slice.copy_from_slice(json_string.quoted().as_bytes()); @@ -617,7 +619,7 @@ mod in_memory_proptests { input[from - 1] = 255; } - (input, from, to, json_string) + (input, from, to, pattern) } } } diff --git a/crates/rsonpath/src/runner.rs b/crates/rsonpath/src/runner.rs index 8716e788..1330ccbb 100644 --- a/crates/rsonpath/src/runner.rs +++ b/crates/rsonpath/src/runner.rs @@ -17,14 +17,14 @@ use std::{ path::Path, }; -pub struct Runner<'q, S> { - pub with_compiled_query: Automaton<'q>, +pub struct Runner { + pub with_compiled_query: Automaton, pub with_engine: ResolvedEngine, pub with_input: ResolvedInput, pub with_output: ResolvedOutput, } -impl> Runner<'_, S> { +impl> Runner { pub fn run(self) -> Result<()> { match self.with_engine { ResolvedEngine::Main => {