Skip to content

Commit

Permalink
feat: index selector engine support (#132)
Browse files Browse the repository at this point in the history
- The automaton transition model has been changed to incorporate index-labelled transitions.
- Both engines now support queries with the index selector.

Ref: #132
  • Loading branch information
zwerdlds authored May 22, 2023
1 parent 953763a commit 2b250d6
Show file tree
Hide file tree
Showing 20 changed files with 851 additions and 274 deletions.
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use_nix
4 changes: 4 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ test-classifier:
test-engine:
cargo test --test engine_correctness_tests

# Run the query tests on default features.
test-parser:
cargo test --test query_parser_tests

# Run all tests, including real dataset tests, on the feature powerset of the project.
test-full:
-cargo install cargo-hack
Expand Down
1 change: 0 additions & 1 deletion crates/rsonpath-lib/src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ pub mod main;
pub mod recursive;
#[cfg(feature = "tail-skip")]
mod tail_skipping;

pub use main::MainEngine as RsonpathEngine;

use self::error::EngineError;
Expand Down
18 changes: 11 additions & 7 deletions crates/rsonpath-lib/src/engine/head_skipping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,17 @@ impl<'b, 'q, I: Input> HeadSkip<'b, 'q, I, BLOCK_SIZE> {

if fallback_state == initial_state && transitions.len() == 1 {
let (label, target_state) = transitions[0];
debug!("Automaton starts with a descendant search, using memmem heuristic.");
return Some(Self {
bytes,
state: target_state,
is_accepting: automaton.is_accepting(target_state),
label,
});

if let Some(named_label) = label.get_label() {
debug!("Automaton starts with a descendant search, using memmem heuristic.");

return Some(Self {
bytes,
state: target_state,
is_accepting: automaton.is_accepting(target_state),
label: named_label,
});
}
}

None
Expand Down
175 changes: 137 additions & 38 deletions crates/rsonpath-lib/src/engine/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ use super::head_skipping::{CanHeadSkip, HeadSkip};
use super::Compiler;
#[cfg(feature = "head-skip")]
use crate::classification::ResumeClassifierState;
use crate::classification::{
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
};
use crate::debug;
use crate::engine::depth::Depth;
use crate::engine::error::EngineError;
Expand All @@ -24,9 +20,16 @@ use crate::engine::tail_skipping::TailSkip;
use crate::engine::{Engine, Input};
use crate::query::automaton::{Automaton, State};
use crate::query::error::CompilerError;
use crate::query::{JsonPathQuery, Label};
use crate::query::{JsonPathQuery, Label, NonNegativeArrayIndex};
use crate::result::QueryResult;
use crate::BLOCK_SIZE;
use crate::{
classification::{
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
},
query::automaton::TransitionLabel,
};
use smallvec::{smallvec, SmallVec};

/// Main engine for a fixed JSONPath query.
Expand Down Expand Up @@ -102,6 +105,9 @@ struct Executor<'q, 'b, I: Input> {
bytes: &'b I,
next_event: Option<Structural>,
is_list: bool,
array_count: NonNegativeArrayIndex,
has_any_array_item_transition: bool,
has_any_array_item_transition_to_accepting: bool,
}

fn query_executor<'q, 'b, I: Input>(
Expand All @@ -116,6 +122,9 @@ fn query_executor<'q, 'b, I: Input>(
bytes,
next_event: None,
is_list: false,
array_count: NonNegativeArrayIndex::ZERO,
has_any_array_item_transition: false,
has_any_array_item_transition_to_accepting: false,
}
}

Expand Down Expand Up @@ -203,10 +212,15 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
let mut any_matched = false;

for &(label, target) in self.automaton[self.state].transitions() {
if self.automaton.is_accepting(target) && self.is_match(idx, label)? {
result.report(idx);
any_matched = true;
break;
match label {
TransitionLabel::ArrayIndex(_) => {}
TransitionLabel::ObjectMember(label) => {
if self.automaton.is_accepting(target) && self.is_match(idx, label)? {
result.report(idx);
any_matched = true;
break;
}
}
}
}
let fallback_state = self.automaton[self.state].fallback_state();
Expand Down Expand Up @@ -240,13 +254,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
R: QueryResult,
{
self.next_event = classifier.next();

let is_next_opening = self.next_event.map_or(false, |s| s.is_opening());

if !is_next_opening {
let fallback_state = self.automaton[self.state].fallback_state();
if self.is_list && self.automaton.is_accepting(fallback_state) {
result.report(idx);
}
let is_fallback_accepting = self
.automaton
.is_accepting(self.automaton[self.state].fallback_state());

if !is_next_opening && self.is_list && is_fallback_accepting {
debug!("Accepting on comma.");
result.report(idx);
}

// After wildcard, check for a matching array index.
// If the index increment exceeds the field's limit, give up.
if self.is_list && self.array_count.try_increment().is_err() {
return Ok(());
}
debug!("Incremented array count to {}", self.array_count);

let match_index = self
.automaton
.has_array_index_transition_to_accepting(self.state, &self.array_count);

if !is_next_opening && match_index {
debug!("Accepting on list item.");
result.report(idx);
}

Ok(())
Expand All @@ -267,15 +300,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
let mut any_matched = false;

if let Some(colon_idx) = self.find_preceding_colon(idx) {
for &(label, target) in self.automaton[self.state].transitions() {
if self.is_match(colon_idx, label)? {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
result.report(colon_idx);
let colon_idx = self.find_preceding_colon(idx);

for &(label, target) in self.automaton[self.state].transitions() {
match label {
TransitionLabel::ArrayIndex(i) => {
if self.is_list && i.eq(&self.array_count) {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
debug!("Accept {idx}");
result.report(idx);
}
break;
}
}
TransitionLabel::ObjectMember(label) => {
if let Some(colon_idx) = colon_idx {
if self.is_match(colon_idx, label)? {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
result.report(colon_idx);
}
break;
}
}
break;
}
}
}
Expand All @@ -301,29 +351,51 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {

if bracket_type == BracketType::Square {
self.is_list = true;
self.has_any_array_item_transition =
self.automaton.has_any_array_item_transition(self.state);
self.has_any_array_item_transition_to_accepting = self
.automaton
.has_any_array_item_transition_to_accepting(self.state);

let fallback = self.automaton[self.state].fallback_state();
if self.automaton.is_accepting(fallback) {
let is_fallback_accepting = self.automaton.is_accepting(fallback);

let searching_list = is_fallback_accepting || self.has_any_array_item_transition;

if searching_list {
classifier.turn_commas_on(idx);
self.next_event = classifier.next();
match self.next_event {
Some(Structural::Closing(_, close_idx)) => {
if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(idx + 1)
{
if next_idx < close_idx {
result.report(next_idx);
self.array_count = NonNegativeArrayIndex::ZERO;
debug!("Initialized array count to {}", self.array_count);

let wants_first_item = is_fallback_accepting
|| self
.automaton
.has_first_array_index_transition_to_accepting(self.state);

if wants_first_item {
self.next_event = classifier.next();

match self.next_event {
Some(Structural::Closing(_, close_idx)) => {
if let Some((next_idx, _)) =
self.bytes.seek_non_whitespace_forward(idx + 1)
{
if next_idx < close_idx {
result.report(next_idx);
}
}
}
Some(Structural::Comma(_)) => {
result.report(idx + 1);
}
_ => (),
}
Some(Structural::Comma(_)) => {
result.report(idx + 1);
}
_ => (),
}
} else {
classifier.turn_commas_off();
}
} else {
classifier.turn_commas_off();
self.is_list = false;
}

Expand Down Expand Up @@ -359,6 +431,12 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
self.state = stack_frame.state;
self.is_list = stack_frame.is_list;
self.array_count = stack_frame.array_count;
self.has_any_array_item_transition = stack_frame.has_any_array_item_transition;
self.has_any_array_item_transition_to_accepting =
stack_frame.has_any_array_item_transition_to_accepting;

debug!("Restored array count to {}", self.array_count);

if self.automaton.is_unitary(self.state) {
let bracket_type = self.current_node_bracket_type();
Expand All @@ -369,6 +447,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
}
}
}

#[cfg(not(feature = "unique-labels"))]
{
self.depth
Expand All @@ -378,13 +457,20 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
self.state = stack_frame.state;
self.is_list = stack_frame.is_list;
self.array_count = stack_frame.array_count;
self.has_any_array_item_transition = stack_frame.has_any_array_item_transition;
self.has_any_array_item_transition_to_accepting =
stack_frame.has_any_array_item_transition_to_accepting;

debug!("Restored array count to {}", self.array_count);
}
}

if self.is_list
&& self
&& (self
.automaton
.is_accepting(self.automaton[self.state].fallback_state())
|| self.has_any_array_item_transition)
{
classifier.turn_commas_on(idx);
} else {
Expand All @@ -402,15 +488,25 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {

fn transition_to(&mut self, target: State, opening: BracketType) {
let target_is_list = opening == BracketType::Square;
if target != self.state || target_is_list != self.is_list {

let fallback = self.automaton[self.state].fallback_state();
let is_fallback_accepting = self.automaton.is_accepting(fallback);
let searching_list = is_fallback_accepting || self.has_any_array_item_transition;

if target != self.state || target_is_list != self.is_list || searching_list {
debug!(
"push {}, goto {target}, is_list = {target_is_list}",
self.state
"push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
self.state, self.array_count
);

self.stack.push(StackFrame {
depth: *self.depth,
state: self.state,
is_list: self.is_list,
array_count: self.array_count,
has_any_array_item_transition: self.has_any_array_item_transition,
has_any_array_item_transition_to_accepting: self
.has_any_array_item_transition_to_accepting,
});
self.state = target;
}
Expand Down Expand Up @@ -467,6 +563,9 @@ struct StackFrame {
depth: u8,
state: State,
is_list: bool,
array_count: NonNegativeArrayIndex,
has_any_array_item_transition: bool,
has_any_array_item_transition_to_accepting: bool,
}

#[derive(Debug)]
Expand Down
Loading

0 comments on commit 2b250d6

Please sign in to comment.