Skip to content

Commit

Permalink
lib: added StringPattern and made Automaton no longer borrow the …
Browse files Browse the repository at this point in the history
…query

The `Automaton` struct borrowed the source query, which also caused the
Engine to carry the query's lifetime with it. The actual data being borrowed
were the `JsonString` values for member transitions.
In preparation for #117 we remove the borrowed `JsonString` and replace it
with `StringPattern`. For UTF-8 the `StringPattern` will be a more complex
struct that precomputes some stuff for efficient matching later.
For now, it's a thin wrapper over a `JsonString`.

During construction we may create many transitions over the same pattern.
To reduce the size of the automaton we cache the patterns and put them
into an `Rc`. This may get optimised later to instead use some kind of
inline storage, but it's unlikely to actually matter. I ran the benchmarks
and saw no measurable difference between the previous version and this one.

Refs: #117 #613
  • Loading branch information
V0ldek committed Dec 22, 2024
1 parent 9a76de9 commit 8c0300e
Show file tree
Hide file tree
Showing 27 changed files with 402 additions and 333 deletions.
69 changes: 18 additions & 51 deletions crates/rsonpath-benchmarks/src/implementations/rsonpath.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
use crate::framework::implementation::Implementation;
use ouroboros::self_referencing;
use rsonpath::{engine::Compiler, input::MmapInput};
use rsonpath::{
engine::main::MainEngine,
engine::{main::MainEngine, Engine},
input::OwnedBytes,
result::{Match, Sink},
};
use rsonpath::{
engine::{Compiler, Engine},
input::MmapInput,
};
use rsonpath_syntax::JsonPathQuery;
use std::{convert::Infallible, fmt::Display, fs, io};
use thiserror::Error;

Expand All @@ -18,16 +13,8 @@ pub struct RsonpathCount {}
pub struct RsonpathMmap {}
pub struct RsonpathMmapCount {}

#[self_referencing()]
pub struct RsonpathQuery {
query: JsonPathQuery,
#[borrows(query)]
#[not_covariant]
engine: MainEngine<'this>,
}

impl Implementation for Rsonpath {
type Query = RsonpathQuery;
type Query = MainEngine;

type File = OwnedBytes<Vec<u8>>;

Expand All @@ -52,25 +39,20 @@ impl Implementation for Rsonpath {

fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
let query = rsonpath_syntax::parse(query).unwrap();
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;

let rsonpath = RsonpathQuery::try_new(query, |query| {
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
})?;

Ok(rsonpath)
Ok(engine)
}

fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
query
.with_engine(|engine| engine.matches(file, &mut VoidSink))
.map_err(RsonpathError::EngineError)?;
query.matches(file, &mut VoidSink).map_err(RsonpathError::EngineError)?;

Ok("[not collected]")
}
}

impl Implementation for RsonpathCount {
type Query = RsonpathQuery;
type Query = MainEngine;

type File = OwnedBytes<Vec<u8>>;

Expand All @@ -95,25 +77,20 @@ impl Implementation for RsonpathCount {

fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
let query = rsonpath_syntax::parse(query).unwrap();
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;

let rsonpath = RsonpathQuery::try_new(query, |query| {
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
})?;

Ok(rsonpath)
Ok(engine)
}

fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
query
.with_engine(|engine| engine.count(file))
.map_err(RsonpathError::EngineError)?;
query.count(file).map_err(RsonpathError::EngineError)?;

Ok("[not collected]")
}
}

impl Implementation for RsonpathMmap {
type Query = RsonpathQuery;
type Query = MainEngine;

type File = MmapInput;

Expand All @@ -138,25 +115,20 @@ impl Implementation for RsonpathMmap {

fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
let query = rsonpath_syntax::parse(query).unwrap();
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;

let rsonpath = RsonpathQuery::try_new(query, |query| {
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
})?;

Ok(rsonpath)
Ok(engine)
}

fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
query
.with_engine(|engine| engine.matches(file, &mut VoidSink))
.map_err(RsonpathError::EngineError)?;
query.matches(file, &mut VoidSink).map_err(RsonpathError::EngineError)?;

Ok("[not collected]")
}
}

impl Implementation for RsonpathMmapCount {
type Query = RsonpathQuery;
type Query = MainEngine;

type File = MmapInput;

Expand All @@ -181,18 +153,13 @@ impl Implementation for RsonpathMmapCount {

fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
let query = rsonpath_syntax::parse(query).unwrap();
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;

let rsonpath = RsonpathQuery::try_new(query, |query| {
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
})?;

Ok(rsonpath)
Ok(engine)
}

fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
query
.with_engine(|engine| engine.count(file))
.map_err(RsonpathError::EngineError)?;
query.count(file).map_err(RsonpathError::EngineError)?;

Ok("[not collected]")
}
Expand Down
49 changes: 27 additions & 22 deletions crates/rsonpath-lib/src/automaton.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@ mod state;

pub use state::{State, StateAttributes};

use crate::{automaton::error::CompilerError, debug};
use crate::{automaton::error::CompilerError, debug, string_pattern::StringPattern};
use nfa::NondeterministicAutomaton;
use rsonpath_syntax::{num::JsonUInt, str::JsonString, JsonPathQuery};
use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
use smallvec::SmallVec;
use std::{fmt::Display, ops::Index};
use std::{fmt::Display, ops::Index, rc::Rc};

/// A minimal, deterministic automaton representing a JSONPath query.
#[derive(Debug, PartialEq, Eq)]
pub struct Automaton<'q> {
states: Vec<StateTable<'q>>,
pub struct Automaton {
states: Vec<StateTable>,
}

/// Transition when a JSON member name matches a [`JsonString`]i.
pub type MemberTransition<'q> = (&'q JsonString, State);
/// Transition when a JSON member name matches a [`StringPattern`].
pub type MemberTransition = (Rc<StringPattern>, State);

/// Transition on elements of an array with indices specified by either a single index
/// or a simple slice expression.
Expand All @@ -45,9 +45,9 @@ pub(super) enum ArrayTransitionLabel {
/// Contains transitions triggered by matching member names or array indices, and a fallback transition
/// triggered when none of the labelled transitions match.
#[derive(Debug)]
pub struct StateTable<'q> {
pub struct StateTable {
attributes: StateAttributes,
member_transitions: SmallVec<[MemberTransition<'q>; 2]>,
member_transitions: SmallVec<[MemberTransition; 2]>,
array_transitions: SmallVec<[ArrayTransition; 2]>,
fallback_state: State,
}
Expand All @@ -59,7 +59,7 @@ pub(crate) struct SimpleSlice {
step: JsonUInt,
}

impl Default for StateTable<'_> {
impl Default for StateTable {
#[inline]
fn default() -> Self {
Self {
Expand All @@ -71,7 +71,7 @@ impl Default for StateTable<'_> {
}
}

impl PartialEq for StateTable<'_> {
impl PartialEq for StateTable {
#[inline]
fn eq(&self, other: &Self) -> bool {
return self.fallback_state == other.fallback_state
Expand All @@ -88,10 +88,10 @@ impl PartialEq for StateTable<'_> {
}
}

impl Eq for StateTable<'_> {}
impl Eq for StateTable {}

impl<'q> Index<State> for Automaton<'q> {
type Output = StateTable<'q>;
impl Index<State> for Automaton {
type Output = StateTable;

#[inline(always)]
fn index(&self, index: State) -> &Self::Output {
Expand Down Expand Up @@ -149,7 +149,7 @@ impl From<SimpleSlice> for ArrayTransitionLabel {
}
}

impl<'q> Automaton<'q> {
impl Automaton {
/// Convert a [`JsonPathQuery`] into a minimal deterministic automaton.
///
/// # Errors
Expand All @@ -158,10 +158,10 @@ impl<'q> Automaton<'q> {
/// - [`CompilerError::NotSupported`] raised if the query contains elements
/// not yet supported by the compiler.
#[inline]
pub fn new(query: &'q JsonPathQuery) -> Result<Self, CompilerError> {
pub fn new(query: &JsonPathQuery) -> Result<Self, CompilerError> {
let nfa = NondeterministicAutomaton::new(query)?;
debug!("NFA: {}", nfa);
Automaton::minimize(nfa)
Self::minimize(nfa)
}

/// Returns whether this automaton represents the select-root JSONPath query ('$').
Expand Down Expand Up @@ -389,12 +389,12 @@ impl<'q> Automaton<'q> {
self[state].attributes.is_unitary()
}

fn minimize(nfa: NondeterministicAutomaton<'q>) -> Result<Self, CompilerError> {
fn minimize(nfa: NondeterministicAutomaton) -> Result<Self, CompilerError> {
minimizer::minimize(nfa)
}
}

impl<'q> StateTable<'q> {
impl StateTable {
/// Returns the state to which a fallback transition leads.
///
/// A fallback transition is the catch-all transition triggered
Expand All @@ -421,7 +421,7 @@ impl<'q> StateTable<'q> {
/// to the contained [`State`].
#[must_use]
#[inline(always)]
pub fn member_transitions(&self) -> &[MemberTransition<'q>] {
pub fn member_transitions(&self) -> &[MemberTransition] {
&self.member_transitions
}
}
Expand All @@ -442,7 +442,7 @@ impl Display for ArrayTransitionLabel {
}
}

impl Display for Automaton<'_> {
impl Display for Automaton {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "digraph {{")?;
Expand Down Expand Up @@ -503,7 +503,12 @@ impl Display for Automaton<'_> {
}
}
for (label, state) in &transitions.member_transitions {
writeln!(f, " {i} -> {} [label=\"{}\"]", state.0, label.unquoted())?
writeln!(
f,
" {i} -> {} [label=\"{}\"]",
state.0,
std::str::from_utf8(label.unquoted()).expect("labels to be valid utf8")
)?
}
writeln!(f, " {i} -> {} [label=\"*\"]", transitions.fallback_state.0)?;
}
Expand Down
Loading

0 comments on commit 8c0300e

Please sign in to comment.