From 2b250d6971ae9c112b52e95878c18aeb2c4fafd3 Mon Sep 17 00:00:00 2001 From: zwerdlds Date: Mon, 22 May 2023 09:10:28 -0700 Subject: [PATCH] feat: index selector engine support (#132) - The automaton transition model has been changed to incorporate index-labelled transitions. - Both engines now support queries with the index selector. Ref: #132 --- .envrc | 1 + Justfile | 4 + crates/rsonpath-lib/src/engine.rs | 1 - .../rsonpath-lib/src/engine/head_skipping.rs | 18 +- crates/rsonpath-lib/src/engine/main.rs | 175 ++++++++++--- crates/rsonpath-lib/src/engine/recursive.rs | 106 ++++++-- crates/rsonpath-lib/src/query.rs | 113 ++------- crates/rsonpath-lib/src/query/automaton.rs | 178 ++++++++++++- .../src/query/automaton/minimizer.rs | 238 +++++++++++++----- .../rsonpath-lib/src/query/automaton/nfa.rs | 33 +-- .../rsonpath-lib/src/query/automaton/state.rs | 2 +- crates/rsonpath-lib/src/query/builder.rs | 22 +- crates/rsonpath-lib/src/query/error.rs | 5 +- .../src/query/nonnegative_array_index.rs | 104 ++++++++ crates/rsonpath-lib/src/query/parser.rs | 32 ++- .../tests/data/basic/array_root_nested.json | 23 ++ .../data/basic/array_root_populated.json | 1 + .../data/basic/array_root_singleton.json | 1 + .../tests/engine_correctness_tests.rs | 48 ++++ .../rsonpath-lib/tests/query_parser_tests.rs | 20 +- 20 files changed, 851 insertions(+), 274 deletions(-) create mode 100644 .envrc create mode 100644 crates/rsonpath-lib/src/query/nonnegative_array_index.rs create mode 100644 crates/rsonpath-lib/tests/data/basic/array_root_nested.json create mode 100644 crates/rsonpath-lib/tests/data/basic/array_root_populated.json create mode 100644 crates/rsonpath-lib/tests/data/basic/array_root_singleton.json diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..4a4726a5 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use_nix diff --git a/Justfile b/Justfile index debbb80c..7f6dfae1 100644 --- a/Justfile +++ b/Justfile @@ -97,6 +97,10 @@ test-classifier: test-engine: cargo test --test engine_correctness_tests +# Run the query tests on default features. +test-parser: + cargo test --test query_parser_tests + # Run all tests, including real dataset tests, on the feature powerset of the project. test-full: -cargo install cargo-hack diff --git a/crates/rsonpath-lib/src/engine.rs b/crates/rsonpath-lib/src/engine.rs index fd09f42b..4401a62e 100644 --- a/crates/rsonpath-lib/src/engine.rs +++ b/crates/rsonpath-lib/src/engine.rs @@ -11,7 +11,6 @@ pub mod main; pub mod recursive; #[cfg(feature = "tail-skip")] mod tail_skipping; - pub use main::MainEngine as RsonpathEngine; use self::error::EngineError; diff --git a/crates/rsonpath-lib/src/engine/head_skipping.rs b/crates/rsonpath-lib/src/engine/head_skipping.rs index 477a1699..c4a59699 100644 --- a/crates/rsonpath-lib/src/engine/head_skipping.rs +++ b/crates/rsonpath-lib/src/engine/head_skipping.rs @@ -85,13 +85,17 @@ impl<'b, 'q, I: Input> HeadSkip<'b, 'q, I, BLOCK_SIZE> { if fallback_state == initial_state && transitions.len() == 1 { let (label, target_state) = transitions[0]; - debug!("Automaton starts with a descendant search, using memmem heuristic."); - return Some(Self { - bytes, - state: target_state, - is_accepting: automaton.is_accepting(target_state), - label, - }); + + if let Some(named_label) = label.get_label() { + debug!("Automaton starts with a descendant search, using memmem heuristic."); + + return Some(Self { + bytes, + state: target_state, + is_accepting: automaton.is_accepting(target_state), + label: named_label, + }); + } } None diff --git a/crates/rsonpath-lib/src/engine/main.rs b/crates/rsonpath-lib/src/engine/main.rs index e5503e45..48d5c95a 100644 --- a/crates/rsonpath-lib/src/engine/main.rs +++ b/crates/rsonpath-lib/src/engine/main.rs @@ -12,10 +12,6 @@ use super::head_skipping::{CanHeadSkip, HeadSkip}; use super::Compiler; #[cfg(feature = "head-skip")] use crate::classification::ResumeClassifierState; -use crate::classification::{ - quotes::{classify_quoted_sequences, QuoteClassifiedIterator}, - structural::{classify_structural_characters, BracketType, Structural, StructuralIterator}, -}; use crate::debug; use crate::engine::depth::Depth; use crate::engine::error::EngineError; @@ -24,9 +20,16 @@ use crate::engine::tail_skipping::TailSkip; use crate::engine::{Engine, Input}; use crate::query::automaton::{Automaton, State}; use crate::query::error::CompilerError; -use crate::query::{JsonPathQuery, Label}; +use crate::query::{JsonPathQuery, Label, NonNegativeArrayIndex}; use crate::result::QueryResult; use crate::BLOCK_SIZE; +use crate::{ + classification::{ + quotes::{classify_quoted_sequences, QuoteClassifiedIterator}, + structural::{classify_structural_characters, BracketType, Structural, StructuralIterator}, + }, + query::automaton::TransitionLabel, +}; use smallvec::{smallvec, SmallVec}; /// Main engine for a fixed JSONPath query. @@ -102,6 +105,9 @@ struct Executor<'q, 'b, I: Input> { bytes: &'b I, next_event: Option, is_list: bool, + array_count: NonNegativeArrayIndex, + has_any_array_item_transition: bool, + has_any_array_item_transition_to_accepting: bool, } fn query_executor<'q, 'b, I: Input>( @@ -116,6 +122,9 @@ fn query_executor<'q, 'b, I: Input>( bytes, next_event: None, is_list: false, + array_count: NonNegativeArrayIndex::ZERO, + has_any_array_item_transition: false, + has_any_array_item_transition_to_accepting: false, } } @@ -203,10 +212,15 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { let mut any_matched = false; for &(label, target) in self.automaton[self.state].transitions() { - if self.automaton.is_accepting(target) && self.is_match(idx, label)? { - result.report(idx); - any_matched = true; - break; + match label { + TransitionLabel::ArrayIndex(_) => {} + TransitionLabel::ObjectMember(label) => { + if self.automaton.is_accepting(target) && self.is_match(idx, label)? { + result.report(idx); + any_matched = true; + break; + } + } } } let fallback_state = self.automaton[self.state].fallback_state(); @@ -240,13 +254,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { R: QueryResult, { self.next_event = classifier.next(); + let is_next_opening = self.next_event.map_or(false, |s| s.is_opening()); - if !is_next_opening { - let fallback_state = self.automaton[self.state].fallback_state(); - if self.is_list && self.automaton.is_accepting(fallback_state) { - result.report(idx); - } + let is_fallback_accepting = self + .automaton + .is_accepting(self.automaton[self.state].fallback_state()); + + if !is_next_opening && self.is_list && is_fallback_accepting { + debug!("Accepting on comma."); + result.report(idx); + } + + // After wildcard, check for a matching array index. + // If the index increment exceeds the field's limit, give up. + if self.is_list && self.array_count.try_increment().is_err() { + return Ok(()); + } + debug!("Incremented array count to {}", self.array_count); + + let match_index = self + .automaton + .has_array_index_transition_to_accepting(self.state, &self.array_count); + + if !is_next_opening && match_index { + debug!("Accepting on list item."); + result.report(idx); } Ok(()) @@ -267,15 +300,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",); let mut any_matched = false; - if let Some(colon_idx) = self.find_preceding_colon(idx) { - for &(label, target) in self.automaton[self.state].transitions() { - if self.is_match(colon_idx, label)? { - any_matched = true; - self.transition_to(target, bracket_type); - if self.automaton.is_accepting(target) { - result.report(colon_idx); + let colon_idx = self.find_preceding_colon(idx); + + for &(label, target) in self.automaton[self.state].transitions() { + match label { + TransitionLabel::ArrayIndex(i) => { + if self.is_list && i.eq(&self.array_count) { + any_matched = true; + self.transition_to(target, bracket_type); + if self.automaton.is_accepting(target) { + debug!("Accept {idx}"); + result.report(idx); + } + break; + } + } + TransitionLabel::ObjectMember(label) => { + if let Some(colon_idx) = colon_idx { + if self.is_match(colon_idx, label)? { + any_matched = true; + self.transition_to(target, bracket_type); + if self.automaton.is_accepting(target) { + result.report(colon_idx); + } + break; + } } - break; } } } @@ -301,29 +351,51 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { if bracket_type == BracketType::Square { self.is_list = true; + self.has_any_array_item_transition = + self.automaton.has_any_array_item_transition(self.state); + self.has_any_array_item_transition_to_accepting = self + .automaton + .has_any_array_item_transition_to_accepting(self.state); let fallback = self.automaton[self.state].fallback_state(); - if self.automaton.is_accepting(fallback) { + let is_fallback_accepting = self.automaton.is_accepting(fallback); + + let searching_list = is_fallback_accepting || self.has_any_array_item_transition; + + if searching_list { classifier.turn_commas_on(idx); - self.next_event = classifier.next(); - match self.next_event { - Some(Structural::Closing(_, close_idx)) => { - if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(idx + 1) - { - if next_idx < close_idx { - result.report(next_idx); + self.array_count = NonNegativeArrayIndex::ZERO; + debug!("Initialized array count to {}", self.array_count); + + let wants_first_item = is_fallback_accepting + || self + .automaton + .has_first_array_index_transition_to_accepting(self.state); + + if wants_first_item { + self.next_event = classifier.next(); + + match self.next_event { + Some(Structural::Closing(_, close_idx)) => { + if let Some((next_idx, _)) = + self.bytes.seek_non_whitespace_forward(idx + 1) + { + if next_idx < close_idx { + result.report(next_idx); + } } } + Some(Structural::Comma(_)) => { + result.report(idx + 1); + } + _ => (), } - Some(Structural::Comma(_)) => { - result.report(idx + 1); - } - _ => (), } } else { classifier.turn_commas_off(); } } else { + classifier.turn_commas_off(); self.is_list = false; } @@ -359,6 +431,12 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) { self.state = stack_frame.state; self.is_list = stack_frame.is_list; + self.array_count = stack_frame.array_count; + self.has_any_array_item_transition = stack_frame.has_any_array_item_transition; + self.has_any_array_item_transition_to_accepting = + stack_frame.has_any_array_item_transition_to_accepting; + + debug!("Restored array count to {}", self.array_count); if self.automaton.is_unitary(self.state) { let bracket_type = self.current_node_bracket_type(); @@ -369,6 +447,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { } } } + #[cfg(not(feature = "unique-labels"))] { self.depth @@ -378,13 +457,20 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) { self.state = stack_frame.state; self.is_list = stack_frame.is_list; + self.array_count = stack_frame.array_count; + self.has_any_array_item_transition = stack_frame.has_any_array_item_transition; + self.has_any_array_item_transition_to_accepting = + stack_frame.has_any_array_item_transition_to_accepting; + + debug!("Restored array count to {}", self.array_count); } } if self.is_list - && self + && (self .automaton .is_accepting(self.automaton[self.state].fallback_state()) + || self.has_any_array_item_transition) { classifier.turn_commas_on(idx); } else { @@ -402,15 +488,25 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { fn transition_to(&mut self, target: State, opening: BracketType) { let target_is_list = opening == BracketType::Square; - if target != self.state || target_is_list != self.is_list { + + let fallback = self.automaton[self.state].fallback_state(); + let is_fallback_accepting = self.automaton.is_accepting(fallback); + let searching_list = is_fallback_accepting || self.has_any_array_item_transition; + + if target != self.state || target_is_list != self.is_list || searching_list { debug!( - "push {}, goto {target}, is_list = {target_is_list}", - self.state + "push {}, goto {target}, is_list = {target_is_list}, array_count: {}", + self.state, self.array_count ); + self.stack.push(StackFrame { depth: *self.depth, state: self.state, is_list: self.is_list, + array_count: self.array_count, + has_any_array_item_transition: self.has_any_array_item_transition, + has_any_array_item_transition_to_accepting: self + .has_any_array_item_transition_to_accepting, }); self.state = target; } @@ -467,6 +563,9 @@ struct StackFrame { depth: u8, state: State, is_list: bool, + array_count: NonNegativeArrayIndex, + has_any_array_item_transition: bool, + has_any_array_item_transition_to_accepting: bool, } #[derive(Debug)] diff --git a/crates/rsonpath-lib/src/engine/recursive.rs b/crates/rsonpath-lib/src/engine/recursive.rs index 06f1f74c..264a465c 100644 --- a/crates/rsonpath-lib/src/engine/recursive.rs +++ b/crates/rsonpath-lib/src/engine/recursive.rs @@ -1,12 +1,14 @@ //! Reference implementation of a JSONPath query engine with recursive descent. #[cfg(feature = "head-skip")] use super::head_skipping::{CanHeadSkip, HeadSkip}; +use crate::classification::quotes::classify_quoted_sequences; +use crate::classification::quotes::QuoteClassifiedIterator; +use crate::classification::structural::classify_structural_characters; +use crate::classification::structural::BracketType; +use crate::classification::structural::Structural; +use crate::classification::structural::StructuralIterator; #[cfg(feature = "head-skip")] use crate::classification::ResumeClassifierState; -use crate::classification::{ - quotes::{classify_quoted_sequences, QuoteClassifiedIterator}, - structural::{classify_structural_characters, BracketType, Structural, StructuralIterator}, -}; use crate::debug; use crate::engine::error::EngineError; #[cfg(feature = "tail-skip")] @@ -15,9 +17,9 @@ use crate::engine::{Compiler, Engine}; #[cfg(feature = "head-skip")] use crate::error::InternalRsonpathError; use crate::input::Input; -use crate::query::automaton::{Automaton, State}; -use crate::query::error::CompilerError; -use crate::query::{JsonPathQuery, Label}; +use crate::query::automaton::{Automaton, State, TransitionLabel}; +use crate::query::error::{ArrayIndexError, CompilerError}; +use crate::query::{JsonPathQuery, Label, NonNegativeArrayIndex}; use crate::result::QueryResult; use crate::BLOCK_SIZE; @@ -171,9 +173,18 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { let fallback_state = self.automaton[state].fallback_state(); let is_fallback_accepting = self.automaton.is_accepting(fallback_state); let is_list = bracket_type == BracketType::Square; - let needs_commas = is_list && is_fallback_accepting; + + let searching_list = self.automaton.has_any_array_item_transition(state); + + let is_accepting_list_item = is_list + && self + .automaton + .has_any_array_item_transition_to_accepting(state); + let needs_commas = is_list && (is_fallback_accepting || searching_list); let needs_colons = !is_list && self.automaton.has_transition_to_accepting(state); + let mut array_count = NonNegativeArrayIndex::ZERO; + let config_characters = |classifier: &mut Classifier!(), idx: usize| { if needs_commas { classifier.turn_commas_on(idx); @@ -190,7 +201,15 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { config_characters(classifier, open_idx); - if needs_commas { + // When a list contains only one item, this block ensures that the list item is reported if appropriate without entering the loop below. + let wants_first_item = self.automaton[state].transitions().iter().any(|t| match t { + (TransitionLabel::ArrayIndex(i), s) if i.eq(&NonNegativeArrayIndex::ZERO) => { + self.automaton.is_accepting(*s) + } + _ => false, + }) || is_fallback_accepting; + + if is_list && wants_first_item { next_event = classifier.next(); if let Some(Structural::Closing(_, close_idx)) = next_event { if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(open_idx + 1) { @@ -216,12 +235,31 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { Some(Structural::Comma(idx)) => { latest_idx = idx; next_event = classifier.next(); + let is_next_opening = next_event.map_or(false, |s| s.is_opening()); if !is_next_opening && is_list && is_fallback_accepting { debug!("Accepting on comma."); result.report(idx); } + + // Once we are in comma search, we have already considered the option that the first item in the list is a match. Iterate on the remaining items. + + if let Err(ArrayIndexError::ExceedsUpperLimitError(_)) = + array_count.try_increment() + { + debug!("Exceeded possible array match in content."); + continue; + } + + let match_index = self + .automaton + .has_array_index_transition_to_accepting(state, &array_count); + + if is_accepting_list_item && !is_next_opening && match_index { + debug!("Accepting on list item."); + result.report(idx); + } } Some(Structural::Colon(idx)) => { debug!("Colon"); @@ -234,11 +272,17 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { let mut any_matched = false; for &(label, target) in self.automaton[state].transitions() { - if self.automaton.is_accepting(target) && self.is_match(idx, label)? { - debug!("Accept {idx}"); - result.report(idx); - any_matched = true; - break; + match label { + TransitionLabel::ObjectMember(label) + if self.automaton.is_accepting(target) + && self.is_match(idx, label)? => + { + debug!("Accept {idx}"); + result.report(idx); + any_matched = true; + break; + } + _ => {} } } let fallback_state = self.automaton[state].fallback_state(); @@ -270,16 +314,32 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { .seek_non_whitespace_backward(idx - 1) .and_then(|(char_idx, char)| (char == b':').then_some(char_idx)); - if let Some(colon_idx) = colon_idx { - debug!("Colon backtracked"); - for &(label, target) in self.automaton[state].transitions() { - if self.is_match(colon_idx, label)? { - matched = Some(target); - if self.automaton.is_accepting(target) { - debug!("Accept {idx}"); - result.report(colon_idx); + for &(label, target) in self.automaton[state].transitions() { + match label { + TransitionLabel::ObjectMember(l) => { + if let Some(colon_idx) = colon_idx { + debug!("Colon backtracked"); + if self.is_match(colon_idx, l)? { + matched = Some(target); + if self.automaton.is_accepting(target) { + debug!("Accept Object Member {}", l.display()); + debug!("Accept {idx}"); + result.report(colon_idx); + } + break; + } + } + } + TransitionLabel::ArrayIndex(i) => { + if is_list && i.eq(&array_count) { + matched = Some(target); + if self.automaton.is_accepting(target) { + debug!("Accept Array Index {i}"); + debug!("Accept {idx}"); + result.report(idx); + } + break; } - break; } } } diff --git a/crates/rsonpath-lib/src/query.rs b/crates/rsonpath-lib/src/query.rs index d05829b9..5e5624f5 100644 --- a/crates/rsonpath-lib/src/query.rs +++ b/crates/rsonpath-lib/src/query.rs @@ -34,45 +34,14 @@ pub mod automaton; pub mod builder; pub mod error; mod label; +mod nonnegative_array_index; mod parser; pub use label::Label; +pub use nonnegative_array_index::NonNegativeArrayIndex; use log::*; use std::fmt::{self, Display}; -/// Provides the [IETF-conforming index value](https://www.rfc-editor.org/rfc/rfc7493.html#section-2). Values are \[0, (2^53)-1]. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub struct NonNegativeArrayIndex(u64); - -/// The upper inclusive bound on index values. -pub const ARRAY_INDEX_ULIMIT: u64 = (1 << 53) - 1; -impl TryFrom for NonNegativeArrayIndex { - type Error = ArrayIndexError; - - #[inline] - fn try_from(value: u64) -> Result { - if value > ARRAY_INDEX_ULIMIT { - Err(ArrayIndexError::ExceedsUpperLimitError(value.to_string())) - } else { - Ok(Self(value)) - } - } -} - -impl From for u64 { - #[inline(always)] - fn from(val: NonNegativeArrayIndex) -> Self { - val.0 - } -} - -impl Display for NonNegativeArrayIndex { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{index}", index = self.0) - } -} - /// Linked list structure of a JSONPath query. #[derive(Debug, PartialEq, Eq)] pub enum JsonPathQueryNode { @@ -87,12 +56,14 @@ pub enum JsonPathQueryNode { /// Represents recursive descendant with a wildcard ('`..*`' tokens). AnyDescendant(Option>), /// Represents direct descendant list item with a positive index (numbers). - ArrayIndex(NonNegativeArrayIndex, Option>), + ArrayIndexChild(NonNegativeArrayIndex, Option>), + /// Represents recursive descendant with an array index ('`..[n]`' tokens). + ArrayIndexDescendant(NonNegativeArrayIndex, Option>), } use JsonPathQueryNode::*; -use self::error::{ArrayIndexError, ParserError}; +use self::error::ParserError; impl JsonPathQueryNode { /// Retrieve the child of the node or `None` if it is the last one @@ -106,7 +77,8 @@ impl JsonPathQueryNode { | AnyChild(node) | Descendant(_, node) | AnyDescendant(node) - | ArrayIndex(_, node) => node.as_deref(), + | ArrayIndexChild(_, node) + | ArrayIndexDescendant(_, node) => node.as_deref(), } } @@ -203,7 +175,8 @@ impl Display for JsonPathQueryNode { AnyChild(_) => write!(f, "[*]"), Descendant(label, _) => write!(f, "..['{}']", label.display()), AnyDescendant(_) => write!(f, "..[*]"), - ArrayIndex(i, _) => write!(f, "[{i}]"), + ArrayIndexChild(i, _) => write!(f, "[{i}]"), + ArrayIndexDescendant(i, _) => write!(f, "..[{i}]"), }?; if let Some(child) = self.child() { @@ -236,7 +209,7 @@ pub trait JsonPathQueryNodeType { /// returns the label it represents; otherwise, `None`. fn label(&self) -> Option<&Label>; - /// If the type is [`JsonPathQueryNode::ArrayIndex`] + /// If the type is [`JsonPathQueryNode::ArrayIndexDescendant`] or [`JsonPathQueryNode::ArrayIndexChild`] /// returns the index it represents; otherwise, `None`. fn array_index(&self) -> Option<&NonNegativeArrayIndex>; } @@ -271,14 +244,18 @@ impl JsonPathQueryNodeType for JsonPathQueryNode { fn label(&self) -> Option<&Label> { match self { Child(label, _) | Descendant(label, _) => Some(label), - Root(_) | AnyChild(_) | AnyDescendant(_) | ArrayIndex(_, _) => None, + Root(_) + | AnyChild(_) + | AnyDescendant(_) + | ArrayIndexChild(_, _) + | ArrayIndexDescendant(_, _) => None, } } #[inline(always)] fn array_index(&self) -> Option<&NonNegativeArrayIndex> { match self { - ArrayIndex(i, _) => Some(i), + ArrayIndexChild(i, _) | ArrayIndexDescendant(i, _) => Some(i), Child(_, _) | Descendant(_, _) | Root(_) | AnyChild(_) | AnyDescendant(_) => None, } } @@ -324,59 +301,3 @@ impl> JsonPathQueryNodeType for O self.as_ref().and_then(|x| x.array_index()) } } - -#[cfg(test)] -mod tests { - use std::{ - collections::hash_map::DefaultHasher, - hash::{Hash, Hasher}, - }; - - use super::*; - - #[test] - fn label_equality() { - let label1 = Label::new("dog"); - let label2 = Label::new("dog"); - - assert_eq!(label1, label2); - } - - #[test] - fn label_inequality() { - let label1 = Label::new("dog"); - let label2 = Label::new("doc"); - - assert_ne!(label1, label2); - } - - #[test] - fn label_hash() { - let label1 = Label::new("dog"); - let label2 = Label::new("dog"); - - let mut s1 = DefaultHasher::new(); - label1.hash(&mut s1); - let h1 = s1.finish(); - - let mut s2 = DefaultHasher::new(); - label2.hash(&mut s2); - let h2 = s2.finish(); - - assert_eq!(h1, h2); - } - - #[test] - fn index_ulimit_sanity_check() { - assert_eq!(9_007_199_254_740_991, ARRAY_INDEX_ULIMIT); - } - - #[test] - fn index_ulimit_parse_check() { - NonNegativeArrayIndex::try_from(ARRAY_INDEX_ULIMIT) - .expect("Array index ulimit should be convertible."); - - NonNegativeArrayIndex::try_from(ARRAY_INDEX_ULIMIT + 1) - .expect_err("Values in excess of array index ulimit should not be convertible."); - } -} diff --git a/crates/rsonpath-lib/src/query/automaton.rs b/crates/rsonpath-lib/src/query/automaton.rs index c5606c21..771e82a0 100644 --- a/crates/rsonpath-lib/src/query/automaton.rs +++ b/crates/rsonpath-lib/src/query/automaton.rs @@ -6,11 +6,11 @@ mod state; pub use state::{State, StateAttributes}; -use super::{error::CompilerError, JsonPathQuery, Label}; +use super::{error::CompilerError, JsonPathQuery, Label, NonNegativeArrayIndex}; use crate::debug; use nfa::NondeterministicAutomaton; use smallvec::SmallVec; -use std::{fmt::Display, ops::Index}; +use std::{borrow::Borrow, fmt::Display, ops::Index}; /// A minimal, deterministic automaton representing a JSONPath query. #[derive(Debug, PartialEq, Eq)] @@ -18,8 +18,79 @@ pub struct Automaton<'q> { states: Vec>, } +/// Represent the distinct methods of moving on a match between states. +#[derive(Debug, Copy, PartialEq, Clone, Eq)] +pub enum TransitionLabel<'q> { + /// Transition when a JSON member name matches a [`Label`]i. + ObjectMember(&'q Label), + /// Transition on the n-th element of an array, with n specified by a [`NonNegativeArrayIndex`]. + ArrayIndex(NonNegativeArrayIndex), +} + +impl<'q> TransitionLabel<'q> { + ///Return the textual [`Label`] being wrapped if so. Returns [`None`] otherwise. + #[must_use] + #[inline(always)] + pub fn get_label(&self) -> Option<&'q Label> { + match self { + TransitionLabel::ObjectMember(l) => Some(l), + TransitionLabel::ArrayIndex(_) => None, + } + } + + ///Return the textual [`Label`] being wrapped if so. Returns [`None`] otherwise. + #[must_use] + #[inline(always)] + pub fn get_array_index(&'q self) -> Option<&'q NonNegativeArrayIndex> { + match self { + TransitionLabel::ArrayIndex(l) => Some(l), + TransitionLabel::ObjectMember(_) => None, + } + } + + /// Wraps a [`Label`] in a [`TransitionLabel`]. + #[must_use] + #[inline(always)] + pub fn new_object_member(label: &'q Label) -> Self { + TransitionLabel::ObjectMember(label) + } + + /// Wraps a [`NonNegativeArrayIndex`] in a [`TransitionLabel`]. + #[must_use] + #[inline(always)] + pub fn new_array_index(label: NonNegativeArrayIndex) -> Self { + TransitionLabel::ArrayIndex(label) + } +} + +impl<'q> From<&'q Label> for TransitionLabel<'q> { + #[must_use] + #[inline(always)] + fn from(label: &'q Label) -> Self { + TransitionLabel::new_object_member(label) + } +} + +impl Display for TransitionLabel<'_> { + #[inline(always)] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TransitionLabel::ObjectMember(label) => write!(f, "{}", label.display()), + TransitionLabel::ArrayIndex(index) => write!(f, "{}", index.get_index()), + } + } +} + +impl> From for TransitionLabel<'_> { + #[must_use] + #[inline(always)] + fn from(label: T) -> Self { + TransitionLabel::new_array_index(*label.borrow()) + } +} + /// A single transition of an [`Automaton`]. -type Transition<'q> = (&'q Label, State); +type Transition<'q> = (TransitionLabel<'q>, State); /// A transition table of a single [`State`] of an [`Automaton`]. /// @@ -156,6 +227,103 @@ impl<'q> Automaton<'q> { self[state].attributes.is_accepting() } + /// Returns whether the given state transitions to any list. + /// + /// # Example + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[2]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// + /// assert!(automaton.has_any_array_item_transition(state)); + /// ``` + #[must_use] + #[inline(always)] + pub fn has_any_array_item_transition(&self, state: State) -> bool { + self[state] + .transitions() + .iter() + .any(|t| matches!(t, (TransitionLabel::ArrayIndex(_), _))) + } + + /// Returns whether the given state is accepting an item in a list. + /// + /// # Example + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[2]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// + /// assert!(automaton.has_any_array_item_transition_to_accepting(state)); + /// ``` + #[must_use] + #[inline(always)] + pub fn has_any_array_item_transition_to_accepting(&self, state: State) -> bool { + self[state].transitions().iter().any(|t| match t { + (TransitionLabel::ArrayIndex(_), s) => self.is_accepting(*s), + _ => false, + }) + } + + /// Returns whether the given state is accepting the first item in a list. + /// + /// # Example + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[0]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// + /// assert!(automaton.has_first_array_index_transition_to_accepting(state)); + /// ``` + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[1]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// + /// assert!(!automaton.has_first_array_index_transition_to_accepting(state)); + /// ``` + #[must_use] + #[inline(always)] + pub fn has_first_array_index_transition_to_accepting(&self, state: State) -> bool { + self.has_array_index_transition_to_accepting(state, &NonNegativeArrayIndex::ZERO) + } + + /// Returns whether the given state is accepting the item at a given index in a list. + /// + /// # Example + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[1]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// let match_index_1 = NonNegativeArrayIndex::new(1); + /// let match_index_2 = NonNegativeArrayIndex::new(2); + /// + /// assert!(automaton.has_array_index_transition_to_accepting(state, &match_index_1)); + /// assert!(!automaton.has_array_index_transition_to_accepting(state, &match_index_2)); + /// ``` + #[must_use] + #[inline(always)] + pub fn has_array_index_transition_to_accepting( + &self, + state: State, + match_index: &NonNegativeArrayIndex, + ) -> bool { + self[state].transitions().iter().any(|t| match t { + (TransitionLabel::ArrayIndex(i), s) => i.eq(match_index) && self.is_accepting(*s), + _ => false, + }) + } + /// Returns whether the given state has any transitions /// (labelled or fallback) to an accepting state. /// @@ -232,7 +400,7 @@ impl<'q> StateTable<'q> { /// Returns the collection of labelled transitions from this state. /// - /// A transition is triggered if the [`Label`] is matched and leads + /// A transition is triggered if the [`TransitionLabel`] is matched and leads /// to the contained [`State`]. #[must_use] #[inline(always)] @@ -279,7 +447,7 @@ impl<'q> Display for Automaton<'q> { for (i, transitions) in self.states.iter().enumerate() { for (label, state) in transitions.transitions.iter() { - writeln!(f, " {i} -> {} [label=\"{}\"]", state.0, label.display(),)? + writeln!(f, " {i} -> {} [label=\"{}\"]", state.0, label,)? } writeln!(f, " {i} -> {} [label=\"*\"]", transitions.fallback_state.0)?; } diff --git a/crates/rsonpath-lib/src/query/automaton/minimizer.rs b/crates/rsonpath-lib/src/query/automaton/minimizer.rs index 43df415d..e9602d91 100644 --- a/crates/rsonpath-lib/src/query/automaton/minimizer.rs +++ b/crates/rsonpath-lib/src/query/automaton/minimizer.rs @@ -5,8 +5,10 @@ use super::nfa::{self, NfaState, NfaStateId}; use super::small_set::{SmallSet, SmallSet256}; use super::state::StateAttributesBuilder; -use super::{Automaton, NondeterministicAutomaton, State as DfaStateId, StateTable}; -use super::{Label, StateAttributes}; +use super::{ + Automaton, NondeterministicAutomaton, State as DfaStateId, StateAttributes, StateTable, + TransitionLabel, +}; use crate::debug; use crate::query::error::CompilerError; use smallvec::{smallvec, SmallVec}; @@ -14,7 +16,7 @@ use vector_map::VecMap; /// Turn the [`NondeterministicAutomaton`] to an equivalent minimal* deterministic [`Automaton`]. /// -/// * Not actualy minimal. See #91 +/// *Not actually minimal. See #91 pub(super) fn minimize(nfa: NondeterministicAutomaton) -> Result { let minimizer = Minimizer { nfa, @@ -45,7 +47,7 @@ pub(super) struct Minimizer<'q> { #[derive(Debug)] struct SuperstateTransitionTable<'q> { - labelled: VecMap<&'q Label, SmallSet256>, + labelled: VecMap, SmallSet256>, wildcard: SmallSet256, } @@ -166,7 +168,7 @@ impl<'q> Minimizer<'q> { fn build_attributes( &self, id: DfaStateId, - transitions: &[(&Label, DfaStateId)], + transitions: &[(TransitionLabel, DfaStateId)], fallback: DfaStateId, ) -> StateAttributes { let mut attrs = StateAttributesBuilder::new(); @@ -255,7 +257,7 @@ impl<'q> Minimizer<'q> { | NfaState::Recursive(nfa::Transition::Labelled(label)) => { debug!( "Considering transition {nfa_state} --{}-> {}", - label.display(), + label, nfa_state.next()?, ); // Add the target NFA state to the target superstate, or create a singleton @@ -395,6 +397,43 @@ mod tests { assert_eq!(result, expected); } + #[test] + fn simple_nonnegative_indexed_test() { + // Query = $[0] + let label = TransitionLabel::ArrayIndex(0.try_into().unwrap()); + + let nfa = NondeterministicAutomaton { + ordered_states: vec![ + NfaState::Direct(nfa::Transition::Labelled(label)), + NfaState::Accepting, + ], + }; + + let result = minimize(nfa).unwrap(); + let expected = Automaton { + states: vec![ + StateTable { + transitions: smallvec![], + fallback_state: State(0), + attributes: StateAttributes::REJECTING, + }, + StateTable { + transitions: smallvec![(label, State(2))], + fallback_state: State(0), + attributes: StateAttributes::UNITARY + | StateAttributes::TRANSITIONS_TO_ACCEPTING, + }, + StateTable { + transitions: smallvec![], + fallback_state: State(0), + attributes: StateAttributes::ACCEPTING, + }, + ], + }; + + assert_eq!(result, expected); + } + #[test] fn simple_descendant_wildcard_test() { // Query = $..* @@ -434,14 +473,18 @@ mod tests { fn interstitial_descendant_wildcard_test() { // Query = $..a.b..*.a..b let label_a = Label::new("a"); + let label_a = (&label_a).into(); + let label_b = Label::new("b"); + let label_b = (&label_b).into(); + let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Labelled(&label_a)), - NfaState::Direct(nfa::Transition::Labelled(&label_b)), + NfaState::Recursive(nfa::Transition::Labelled(label_a)), + NfaState::Direct(nfa::Transition::Labelled(label_b)), NfaState::Recursive(nfa::Transition::Wildcard), - NfaState::Direct(nfa::Transition::Labelled(&label_a)), - NfaState::Recursive(nfa::Transition::Labelled(&label_b)), + NfaState::Direct(nfa::Transition::Labelled(label_a)), + NfaState::Recursive(nfa::Transition::Labelled(label_b)), NfaState::Accepting, ], }; @@ -455,12 +498,12 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label_a, State(2))], + transitions: smallvec![(label_a, State(2))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(2)), (&label_b, State(3))], + transitions: smallvec![(label_a, State(2)), (label_b, State(3))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, @@ -470,17 +513,17 @@ mod tests { attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(5))], + transitions: smallvec![(label_a, State(5))], fallback_state: State(4), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_b, State(6))], + transitions: smallvec![(label_b, State(6))], fallback_state: State(5), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label_b, State(6))], + transitions: smallvec![(label_b, State(6))], fallback_state: State(5), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, @@ -495,14 +538,18 @@ mod tests { fn interstitial_nondescendant_wildcard_test() { // Query = $..a.b.*.a..b let label_a = Label::new("a"); + let label_a = (&label_a).into(); + let label_b = Label::new("b"); + let label_b = (&label_b).into(); + let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Labelled(&label_a)), - NfaState::Direct(nfa::Transition::Labelled(&label_b)), + NfaState::Recursive(nfa::Transition::Labelled(label_a)), + NfaState::Direct(nfa::Transition::Labelled(label_b)), NfaState::Direct(nfa::Transition::Wildcard), - NfaState::Direct(nfa::Transition::Labelled(&label_a)), - NfaState::Recursive(nfa::Transition::Labelled(&label_b)), + NfaState::Direct(nfa::Transition::Labelled(label_a)), + NfaState::Recursive(nfa::Transition::Labelled(label_b)), NfaState::Accepting, ], }; @@ -516,37 +563,37 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label_a, State(2))], + transitions: smallvec![(label_a, State(2))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(2)), (&label_b, State(3))], + transitions: smallvec![(label_a, State(2)), (label_b, State(3))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(5))], + transitions: smallvec![(label_a, State(5))], fallback_state: State(4), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(6))], + transitions: smallvec![(label_a, State(6))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(6)), (&label_b, State(3))], + transitions: smallvec![(label_a, State(6)), (label_b, State(3))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_b, State(7))], + transitions: smallvec![(label_b, State(7))], fallback_state: State(6), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label_b, State(7))], + transitions: smallvec![(label_b, State(7))], fallback_state: State(6), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, @@ -561,9 +608,11 @@ mod tests { fn simple_multi_accepting_test() { // Query = $..a.* let label = Label::new("a"); + let label = (&label).into(); + let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Labelled(&label)), + NfaState::Recursive(nfa::Transition::Labelled(label)), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Accepting, ], @@ -578,22 +627,22 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label, State(2)),], + transitions: smallvec![(label, State(2)),], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label, State(4))], + transitions: smallvec![(label, State(4))], fallback_state: State(3), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(2))], + transitions: smallvec![(label, State(2))], fallback_state: State(1), attributes: StateAttributes::ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(4))], + transitions: smallvec![(label, State(4))], fallback_state: State(3), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, @@ -604,13 +653,52 @@ mod tests { assert_eq!(result, expected); } + #[test] + fn simple_multi_accepting_nneg_index_test() { + // Query = $..[3] + let label = TransitionLabel::ArrayIndex(0.try_into().unwrap()); + + let nfa = NondeterministicAutomaton { + ordered_states: vec![ + NfaState::Recursive(nfa::Transition::Labelled(label)), + NfaState::Accepting, + ], + }; + + let result = minimize(nfa).unwrap(); + let expected = Automaton { + states: vec![ + StateTable { + transitions: smallvec![], + fallback_state: State(0), + attributes: StateAttributes::REJECTING, + }, + StateTable { + transitions: smallvec![(label, State(2)),], + fallback_state: State(1), + attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, + }, + StateTable { + transitions: smallvec![(label, State(2))], + fallback_state: State(1), + attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING + | StateAttributes::ACCEPTING, + }, + ], + }; + + assert_eq!(result, expected); + } + #[test] fn chained_wildcard_children_test() { // Query = $.a.*.*.* let label = Label::new("a"); + let label = (&label).into(); + let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Labelled(&label)), + NfaState::Direct(nfa::Transition::Labelled(label)), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Direct(nfa::Transition::Wildcard), @@ -627,7 +715,7 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label, State(2))], + transitions: smallvec![(label, State(2))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, @@ -661,9 +749,11 @@ mod tests { fn chained_wildcard_children_after_descendant_test() { // Query = $..a.*.* let label = Label::new("a"); + let label = (&label).into(); + let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Recursive(nfa::Transition::Labelled(&label)), + NfaState::Recursive(nfa::Transition::Labelled(label)), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Direct(nfa::Transition::Wildcard), NfaState::Accepting, @@ -679,44 +769,44 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label, State(2))], + transitions: smallvec![(label, State(2))], fallback_state: State(1), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label, State(4))], + transitions: smallvec![(label, State(4))], fallback_state: State(3), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label, State(8))], + transitions: smallvec![(label, State(8))], fallback_state: State(7), attributes: StateAttributes::EMPTY | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(6))], + transitions: smallvec![(label, State(6))], fallback_state: State(5), attributes: StateAttributes::EMPTY | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(8))], + transitions: smallvec![(label, State(8))], fallback_state: State(7), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(6))], + transitions: smallvec![(label, State(6))], fallback_state: State(5), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(2))], + transitions: smallvec![(label, State(2))], fallback_state: State(1), attributes: StateAttributes::ACCEPTING, }, StateTable { - transitions: smallvec![(&label, State(4))], + transitions: smallvec![(label, State(4))], fallback_state: State(3), attributes: StateAttributes::ACCEPTING, }, @@ -730,20 +820,29 @@ mod tests { fn child_and_descendant_test() { // Query = $.x..a.b.a.b.c..d let label_a = Label::new("a"); + let label_a = (&label_a).into(); + let label_b = Label::new("b"); + let label_b = (&label_b).into(); + let label_c = Label::new("c"); + let label_c = (&label_c).into(); + let label_d = Label::new("d"); + let label_d = (&label_d).into(); + let label_x = Label::new("x"); + let label_x = (&label_x).into(); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Labelled(&label_x)), - NfaState::Recursive(nfa::Transition::Labelled(&label_a)), - NfaState::Direct(nfa::Transition::Labelled(&label_b)), - NfaState::Direct(nfa::Transition::Labelled(&label_a)), - NfaState::Direct(nfa::Transition::Labelled(&label_b)), - NfaState::Direct(nfa::Transition::Labelled(&label_c)), - NfaState::Recursive(nfa::Transition::Labelled(&label_d)), + NfaState::Direct(nfa::Transition::Labelled(label_x)), + NfaState::Recursive(nfa::Transition::Labelled(label_a)), + NfaState::Direct(nfa::Transition::Labelled(label_b)), + NfaState::Direct(nfa::Transition::Labelled(label_a)), + NfaState::Direct(nfa::Transition::Labelled(label_b)), + NfaState::Direct(nfa::Transition::Labelled(label_c)), + NfaState::Recursive(nfa::Transition::Labelled(label_d)), NfaState::Accepting, ], }; @@ -757,42 +856,42 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label_x, State(2))], + transitions: smallvec![(label_x, State(2))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, StateTable { - transitions: smallvec![(&label_a, State(3))], + transitions: smallvec![(label_a, State(3))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(3)), (&label_b, State(4))], + transitions: smallvec![(label_a, State(3)), (label_b, State(4))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(5))], + transitions: smallvec![(label_a, State(5))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(3)), (&label_b, State(6))], + transitions: smallvec![(label_a, State(3)), (label_b, State(6))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(5)), (&label_c, State(7))], + transitions: smallvec![(label_a, State(5)), (label_c, State(7))], fallback_state: State(2), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_d, State(8))], + transitions: smallvec![(label_d, State(8))], fallback_state: State(7), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label_d, State(8))], + transitions: smallvec![(label_d, State(8))], fallback_state: State(7), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, @@ -807,16 +906,21 @@ mod tests { fn child_descendant_and_child_wildcard_test() { // Query = $.x.*..a.*.b let label_a = Label::new("a"); + let label_a = (&label_a).into(); + let label_b = Label::new("b"); + let label_b = (&label_b).into(); + let label_x = Label::new("x"); + let label_x = (&label_x).into(); let nfa = NondeterministicAutomaton { ordered_states: vec![ - NfaState::Direct(nfa::Transition::Labelled(&label_x)), + NfaState::Direct(nfa::Transition::Labelled(label_x)), NfaState::Direct(nfa::Transition::Wildcard), - NfaState::Recursive(nfa::Transition::Labelled(&label_a)), + NfaState::Recursive(nfa::Transition::Labelled(label_a)), NfaState::Direct(nfa::Transition::Wildcard), - NfaState::Direct(nfa::Transition::Labelled(&label_b)), + NfaState::Direct(nfa::Transition::Labelled(label_b)), NfaState::Accepting, ], }; @@ -830,7 +934,7 @@ mod tests { attributes: StateAttributes::REJECTING, }, StateTable { - transitions: smallvec![(&label_x, State(2))], + transitions: smallvec![(label_x, State(2))], fallback_state: State(0), attributes: StateAttributes::UNITARY, }, @@ -840,33 +944,33 @@ mod tests { attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(4))], + transitions: smallvec![(label_a, State(4))], fallback_state: State(3), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(6))], + transitions: smallvec![(label_a, State(6))], fallback_state: State(5), attributes: StateAttributes::EMPTY, }, StateTable { - transitions: smallvec![(&label_a, State(4)), (&label_b, State(8))], + transitions: smallvec![(label_a, State(4)), (label_b, State(8))], fallback_state: State(3), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label_a, State(6)), (&label_b, State(7))], + transitions: smallvec![(label_a, State(6)), (label_b, State(7))], fallback_state: State(5), attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label_a, State(4)), (&label_b, State(8))], + transitions: smallvec![(label_a, State(4)), (label_b, State(8))], fallback_state: State(3), attributes: StateAttributes::ACCEPTING | StateAttributes::TRANSITIONS_TO_ACCEPTING, }, StateTable { - transitions: smallvec![(&label_a, State(4))], + transitions: smallvec![(label_a, State(4))], fallback_state: State(3), attributes: StateAttributes::ACCEPTING, }, diff --git a/crates/rsonpath-lib/src/query/automaton/nfa.rs b/crates/rsonpath-lib/src/query/automaton/nfa.rs index 538e0982..721112d9 100644 --- a/crates/rsonpath-lib/src/query/automaton/nfa.rs +++ b/crates/rsonpath-lib/src/query/automaton/nfa.rs @@ -1,10 +1,8 @@ //! Definition of a nondeterministic automaton that can be directly //! obtained from a JsonPath query. This is then turned into //! a DFA with the minimizer. -use crate::{ - error::UnsupportedFeatureError, - query::{error::CompilerError, JsonPathQuery, JsonPathQueryNode, JsonPathQueryNodeType, Label}, -}; +use super::TransitionLabel; +use crate::query::{error::CompilerError, JsonPathQuery, JsonPathQueryNode, JsonPathQueryNodeType}; use std::{fmt::Display, ops::Index}; /// An NFA representing a query. It is always a directed path @@ -32,7 +30,7 @@ use NfaState::*; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(super) enum Transition<'q> { /// A transition matching a specific [`Label`] only. - Labelled(&'q Label), + Labelled(TransitionLabel<'q>), /// A transition matching anything. Wildcard, } @@ -71,14 +69,19 @@ impl<'q> NondeterministicAutomaton<'q> { .filter_map(|node| match node { JsonPathQueryNode::Root(_) => None, JsonPathQueryNode::Descendant(label, _) => { - Some(Ok(Recursive(Transition::Labelled(label)))) + Some(Ok(Recursive(Transition::Labelled(label.into())))) + } + JsonPathQueryNode::Child(label, _) => { + Some(Ok(Direct(Transition::Labelled(label.into())))) } - JsonPathQueryNode::Child(label, _) => Some(Ok(Direct(Transition::Labelled(label)))), JsonPathQueryNode::AnyChild(_) => Some(Ok(Direct(Transition::Wildcard))), JsonPathQueryNode::AnyDescendant(_) => Some(Ok(Recursive(Transition::Wildcard))), - JsonPathQueryNode::ArrayIndex(_, _) => Some(Err(CompilerError::NotSupported( - UnsupportedFeatureError::array_index(), - ))), + JsonPathQueryNode::ArrayIndexChild(index, _) => { + Some(Ok(Direct(Transition::Labelled((*index).into())))) + } + JsonPathQueryNode::ArrayIndexDescendant(index, _) => { + Some(Ok(Recursive(Transition::Labelled((*index).into())))) + } }) .collect(); let mut states = states_result?; @@ -132,24 +135,24 @@ impl<'q> Display for NondeterministicAutomaton<'q> { for (i, state) in self.ordered_states.iter().enumerate() { match state { Direct(Transition::Labelled(label)) => { - writeln!(f, "s{i}.{} -> s{};", label.display(), i + 1)?; + writeln!(f, "s{i}.{} -> s{};", label, i + 1)?; } Direct(Transition::Wildcard) => { for label in &all_labels { - writeln!(f, "s{i}.{} -> s{};", label.display(), i + 1)?; + writeln!(f, "s{i}.{} -> s{};", label, i + 1)?; } writeln!(f, "s{i}.X -> s{};", i + 1)?; } Recursive(Transition::Labelled(label)) => { - writeln!(f, "s{i}.{} -> s{i}, s{};", label.display(), i + 1)?; + writeln!(f, "s{i}.{} -> s{i}, s{};", label, i + 1)?; for label in all_labels.iter().filter(|&l| l != label) { - writeln!(f, "s{i}.{} -> s{i};", label.display())?; + writeln!(f, "s{i}.{} -> s{i};", label)?; } writeln!(f, "s{i}.X -> s{i};")?; } Recursive(Transition::Wildcard) => { for label in &all_labels { - writeln!(f, "s{i}.{} -> s{i}, s{};", label.display(), i + 1)?; + writeln!(f, "s{i}.{} -> s{i}, s{};", label, i + 1)?; } writeln!(f, "s{i}.X -> s{i}, s{};", i + 1)?; } diff --git a/crates/rsonpath-lib/src/query/automaton/state.rs b/crates/rsonpath-lib/src/query/automaton/state.rs index a73b14e8..682caa45 100644 --- a/crates/rsonpath-lib/src/query/automaton/state.rs +++ b/crates/rsonpath-lib/src/query/automaton/state.rs @@ -1,4 +1,4 @@ -//! Derfinition of [`State`] and DFA-state attributes giving details +//! Definition of [`State`] and DFA-state attributes giving details //! about the state's properties. use std::{fmt::Display, ops::BitOr}; diff --git a/crates/rsonpath-lib/src/query/builder.rs b/crates/rsonpath-lib/src/query/builder.rs index 56b660fc..c440b8d1 100644 --- a/crates/rsonpath-lib/src/query/builder.rs +++ b/crates/rsonpath-lib/src/query/builder.rs @@ -51,8 +51,16 @@ impl JsonPathQueryBuilder { /// Add a child selector with a given index. #[must_use] #[inline(always)] - pub fn array_index(mut self, index: NonNegativeArrayIndex) -> Self { - self.nodes.push(NodeTemplate::ArrayIndex(index)); + pub fn array_index_child(mut self, index: NonNegativeArrayIndex) -> Self { + self.nodes.push(NodeTemplate::ArrayIndexChild(index)); + self + } + + /// Add a descendant selector with a given index. + #[must_use] + #[inline(always)] + pub fn array_index_descendant(mut self, index: NonNegativeArrayIndex) -> Self { + self.nodes.push(NodeTemplate::ArrayIndexDescendant(index)); self } @@ -88,8 +96,11 @@ impl JsonPathQueryBuilder { for node in self.nodes.into_iter().rev() { last = match node { - NodeTemplate::ArrayIndex(i) => { - Some(Box::new(JsonPathQueryNode::ArrayIndex(i, last))) + NodeTemplate::ArrayIndexChild(i) => { + Some(Box::new(JsonPathQueryNode::ArrayIndexChild(i, last))) + } + NodeTemplate::ArrayIndexDescendant(i) => { + Some(Box::new(JsonPathQueryNode::ArrayIndexDescendant(i, last))) } NodeTemplate::Child(label) => Some(Box::new(JsonPathQueryNode::Child(label, last))), NodeTemplate::AnyChild => Some(Box::new(JsonPathQueryNode::AnyChild(last))), @@ -124,7 +135,8 @@ impl From for JsonPathQuery { enum NodeTemplate { Child(Label), - ArrayIndex(NonNegativeArrayIndex), + ArrayIndexChild(NonNegativeArrayIndex), + ArrayIndexDescendant(NonNegativeArrayIndex), AnyChild, AnyDescendant, Descendant(Label), diff --git a/crates/rsonpath-lib/src/query/error.rs b/crates/rsonpath-lib/src/query/error.rs index 49a2031a..5c6b8a2e 100644 --- a/crates/rsonpath-lib/src/query/error.rs +++ b/crates/rsonpath-lib/src/query/error.rs @@ -33,14 +33,13 @@ //! _ => unreachable!(), //! } //! ``` +use super::NonNegativeArrayIndex; use std::{ fmt::{self, Display}, num::TryFromIntError, }; use thiserror::Error; -use super::ARRAY_INDEX_ULIMIT; - /// Errors raised by the query parser. #[derive(Debug, Error)] pub enum ParserError { @@ -75,7 +74,7 @@ pub enum ArrayIndexError { /// A value in excess of the permitted size was specified. #[error( "Array index {0} exceeds maximum specification value of {}.", - ARRAY_INDEX_ULIMIT + NonNegativeArrayIndex::MAX )] ExceedsUpperLimitError(String), diff --git a/crates/rsonpath-lib/src/query/nonnegative_array_index.rs b/crates/rsonpath-lib/src/query/nonnegative_array_index.rs new file mode 100644 index 00000000..32c2c5c5 --- /dev/null +++ b/crates/rsonpath-lib/src/query/nonnegative_array_index.rs @@ -0,0 +1,104 @@ +use super::error::ArrayIndexError; +use std::fmt::{self, Display, Formatter}; + +/// Array index to search for in a JSON document. +/// +/// Represents a specific location from the front of the list in a json array. +/// Provides the [IETF-conforming index value](https://www.rfc-editor.org/rfc/rfc7493.html#section-2). Values are \[0, (2^53)-1]. +/// # Examples +/// +/// ``` +/// # use rsonpath_lib::query::NonNegativeArrayIndex; +/// +/// let idx = NonNegativeArrayIndex::new(2); +/// +/// assert_eq!(idx.get_index(), 2); +/// ``` +#[derive(Clone, Copy, PartialEq, Eq, Debug, PartialOrd, Ord)] +pub struct NonNegativeArrayIndex(u64); + +/// The upper inclusive bound on index values. +const ARRAY_INDEX_ULIMIT: u64 = (1 << 53) - 1; + +impl TryFrom for NonNegativeArrayIndex { + type Error = ArrayIndexError; + + #[inline] + fn try_from(value: u64) -> Result { + if value > ARRAY_INDEX_ULIMIT { + Err(ArrayIndexError::ExceedsUpperLimitError(value.to_string())) + } else { + Ok(Self(value)) + } + } +} + +impl NonNegativeArrayIndex { + /// A constant index for the common and starting case of the first item. + pub const ZERO: Self = Self::new(0); + /// A constant index for the largest addressable index. + pub const MAX: Self = Self::new(ARRAY_INDEX_ULIMIT); + + /// Create a new search index from a u64. + #[must_use] + #[inline] + pub const fn new(index: u64) -> Self { + Self(index) + } + + /// Create a new search index from a u64. + /// # Errors + /// Will return `Err` if the increment causes the [`NonNegativeArrayIndex`] to exceed the addressable IETF-conforming index value value. + #[inline] + pub fn try_increment(&mut self) -> Result<(), ArrayIndexError> { + let new_index = self.0 + 1; + if new_index <= ARRAY_INDEX_ULIMIT { + self.0 = new_index; + Ok(()) + } else { + Err(ArrayIndexError::ExceedsUpperLimitError( + new_index.to_string(), + )) + } + } + + /// Return the index stored. + #[must_use] + #[inline] + pub const fn get_index(&self) -> u64 { + self.0 + } +} + +impl From for u64 { + #[inline(always)] + fn from(val: NonNegativeArrayIndex) -> Self { + val.0 + } +} + +impl Display for NonNegativeArrayIndex { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[cfg(test)] +mod tests { + use super::{NonNegativeArrayIndex, ARRAY_INDEX_ULIMIT}; + + #[test] + fn index_ulimit_sanity_check() { + assert_eq!(9_007_199_254_740_991, ARRAY_INDEX_ULIMIT); + } + + #[test] + fn index_ulimit_parse_check() { + NonNegativeArrayIndex::try_from(ARRAY_INDEX_ULIMIT) + .expect("Array index ulimit should be convertible."); + + NonNegativeArrayIndex::try_from(ARRAY_INDEX_ULIMIT + 1) + .expect_err("Values in excess of array index ulimit should not be convertible."); + } +} diff --git a/crates/rsonpath-lib/src/query/parser.rs b/crates/rsonpath-lib/src/query/parser.rs index 2d1f28bd..5d027de7 100644 --- a/crates/rsonpath-lib/src/query/parser.rs +++ b/crates/rsonpath-lib/src/query/parser.rs @@ -1,5 +1,4 @@ use super::error::{ArrayIndexError, ParseErrorReport, ParserError}; -use super::ARRAY_INDEX_ULIMIT; use crate::debug; use crate::query::{ JsonPathQuery, JsonPathQueryNode, JsonPathQueryNodeType, Label, NonNegativeArrayIndex, @@ -14,9 +13,10 @@ use std::fmt::{self, Display}; enum Token<'a> { Root, Child(LabelString<'a>), - ArrayIndex(NonNegativeArrayIndex), + ArrayIndexChild(NonNegativeArrayIndex), WildcardChild(), Descendant(LabelString<'a>), + ArrayIndexDescendant(NonNegativeArrayIndex), WildcardDescendant(), } @@ -31,10 +31,11 @@ impl Display for Token<'_> { match self { Token::Root => write!(f, "$"), Token::Child(label) => write!(f, "['{label}']"), - Token::ArrayIndex(i) => write!(f, "[{i}]"), + Token::ArrayIndexChild(i) => write!(f, "[{i}]"), Token::WildcardChild() => write!(f, "[*]"), Token::Descendant(label) => write!(f, "..['{label}']"), Token::WildcardDescendant() => write!(f, "..[*]"), + Token::ArrayIndexDescendant(i) => write!(f, "..[{i}]"), } } } @@ -127,12 +128,17 @@ fn tokens_to_node<'a, I: Iterator>>( Label::new(label.borrow()), child_node, ))), - Token::ArrayIndex(i) => Ok(Some(JsonPathQueryNode::ArrayIndex(i, child_node))), + Token::ArrayIndexChild(i) => { + Ok(Some(JsonPathQueryNode::ArrayIndexChild(i, child_node))) + } Token::WildcardChild() => Ok(Some(JsonPathQueryNode::AnyChild(child_node))), Token::Descendant(label) => Ok(Some(JsonPathQueryNode::Descendant( Label::new(label.borrow()), child_node, ))), + Token::ArrayIndexDescendant(i) => { + Ok(Some(JsonPathQueryNode::ArrayIndexDescendant(i, child_node))) + } Token::WildcardDescendant() => { Ok(Some(JsonPathQueryNode::AnyDescendant(child_node))) } @@ -194,9 +200,12 @@ fn dot_wildcard_selector<'a>() -> impl Parser<'a, char> { } fn descendant_selector<'a>() -> impl Parser<'a, Token<'a>> { - map( - preceded(tag(".."), alt((label(), index_selector()))), - Token::Descendant, + preceded( + tag(".."), + alt(( + map(alt((label(), index_selector())), Token::Descendant), + array_index_descendant_selector(), + )), ) } @@ -233,7 +242,11 @@ fn label_character<'a>() -> impl Parser<'a, char> { } fn array_index_child_selector<'a>() -> impl Parser<'a, Token<'a>> { - map(array_index_selector(), Token::ArrayIndex) + map(array_index_selector(), Token::ArrayIndexChild) +} + +fn array_index_descendant_selector<'a>() -> impl Parser<'a, Token<'a>> { + map(array_index_selector(), Token::ArrayIndexDescendant) } fn array_index_selector<'a>() -> impl Parser<'a, NonNegativeArrayIndex> { @@ -248,7 +261,8 @@ fn parsed_array_index<'a>() -> impl Parser<'a, u64> { map_res(length_limited_array_index(), str::parse) } -const ARRAY_INDEX_ULIMIT_BASE_10_DIGIT_COUNT: usize = ARRAY_INDEX_ULIMIT.ilog10() as usize; +const ARRAY_INDEX_ULIMIT_BASE_10_DIGIT_COUNT: usize = + NonNegativeArrayIndex::MAX.get_index().ilog10() as usize; fn length_limited_array_index<'a>() -> impl Parser<'a, &'a str> { map_res(digit1, |cs: &str| { if cs.len() > (ARRAY_INDEX_ULIMIT_BASE_10_DIGIT_COUNT + 1) { diff --git a/crates/rsonpath-lib/tests/data/basic/array_root_nested.json b/crates/rsonpath-lib/tests/data/basic/array_root_nested.json new file mode 100644 index 00000000..8035ca22 --- /dev/null +++ b/crates/rsonpath-lib/tests/data/basic/array_root_nested.json @@ -0,0 +1,23 @@ +[ + [ + [0] + ], + [], + [ + [], + [ + [ + [], + 0 + ], + [ + [], + 0 + ], + [ + [], + 0 + ] + ] + ] +] \ No newline at end of file diff --git a/crates/rsonpath-lib/tests/data/basic/array_root_populated.json b/crates/rsonpath-lib/tests/data/basic/array_root_populated.json new file mode 100644 index 00000000..3a26a2e5 --- /dev/null +++ b/crates/rsonpath-lib/tests/data/basic/array_root_populated.json @@ -0,0 +1 @@ +[1,2,3] \ No newline at end of file diff --git a/crates/rsonpath-lib/tests/data/basic/array_root_singleton.json b/crates/rsonpath-lib/tests/data/basic/array_root_singleton.json new file mode 100644 index 00000000..7660873d --- /dev/null +++ b/crates/rsonpath-lib/tests/data/basic/array_root_singleton.json @@ -0,0 +1 @@ +[1] diff --git a/crates/rsonpath-lib/tests/engine_correctness_tests.rs b/crates/rsonpath-lib/tests/engine_correctness_tests.rs index 8544f478..6af5f9cf 100644 --- a/crates/rsonpath-lib/tests/engine_correctness_tests.rs +++ b/crates/rsonpath-lib/tests/engine_correctness_tests.rs @@ -21,9 +21,29 @@ macro_rules! count_test_cases { #[test_case("basic/atomic_descendant.json", "$..a..b" => 0; "atomic_descendant.json $..a..b")] #[test_case("basic/atomic_descendant.json", "$..*..b" => 1; "atomic_descendant.json any descendant $..*..b")] #[test_case("basic/atomic_descendant.json", "$..*" => 4; "atomic_descendant.json any descendant $..*")] + #[test_case("basic/atomic_descendant.json", "$.b[0]" => 1; "atomic_descendant.json nneg array index")] + #[test_case("basic/atomic_descendant.json", "$.b[1]" => 0; "atomic_descendant.json nonexistent nneg array index")] + #[test_case("basic/atomic_descendant.json", "$..[0]" => 1; "atomic_descendant.json descendant nneg array index")] + #[test_case("basic/atomic_descendant.json", "$.b[0].b" => 1; "atomic_descendant.json nested nneg array index")] #[test_case("basic/atomic_after_complex.json", "$.a..b" => 1; "atomic_after_complex.json $.a..b")] + #[test_case("basic/atomic_after_complex.json", "$.a[0]" => 1; "atomic_after_complex.json nneg array index")] + #[test_case("basic/atomic_after_complex.json", "$.a[0].c.d[2]" => 1; "atomic_after_complex.json nneg last array index")] + #[test_case("basic/atomic_after_complex.json", "$.a[0].c.d[3]" => 0; "atomic_after_complex.json nneg nonexistent array index")] + #[test_case("basic/atomic_after_complex.json", "$..*[2]" => 1; "atomic_after_complex.json any desc nneg nonexistent array index")] + #[test_case("basic/atomic_descendant.json", "$[1]" => 0; "atomic_descendant.json nneg nonexistent array index")] #[test_case("basic/array_root.json", "$" => 1; "array_root.json $")] #[test_case("basic/array_root.json", "" => 1; "array_root.json")] + #[test_case("basic/array_root_nested.json", "$[0]" => 1; "array_root_nested.json nneg array top")] + #[test_case("basic/array_root_nested.json", "$[0].*" => 1; "array_root_nested.json nneg array inner any child")] + #[test_case("basic/array_root_nested.json", "$.*[0]" => 2; "array_root_nested.json any nneg array inner")] + #[test_case("basic/array_root_nested.json", "$[2][1][0][1]" => 1; "array_root_nested.json any nneg array inner inner inner")] + #[test_case("basic/array_root_nested.json", "$[2][1].*" => 3; "array_root_nested.json any nneg array inner inner child")] + #[test_case("basic/array_root_nested.json", "$[2].*[1]" => 1; "array_root_nested.json nneg array child inner")] + #[test_case("basic/array_root_nested.json", "$[2][1]..*[1]" => 3; "array_root_nested.json nneg nneg anydesc nneg")] + #[test_case("basic/array_root_nested.json", "$[2]..*" => 11; "array_root_nested.json nneg array anydesc")] + #[test_case("basic/array_root_nested.json", "$..*[0]" => 7; "array_root_nested.json anydesc nneg array")] + #[test_case("basic/array_root_nested.json", "$[2][0]" => 1; "array_root_nested.json nneg array direct first")] + #[test_case("basic/array_root_nested.json", "$[2][1]" => 1; "array_root_nested.json nneg array direct second")] #[test_case("basic/child.json", "$..a..b.c..d" => 3; "child.json $..a..b.c..d")] #[test_case("basic/child_hell.json", "$..x..a.b.a.b.c" => 6; "child_hell.json $..x..a.b.a.b.c")] #[test_case("basic/empty.json", "" => 0; "empty.json")] @@ -159,6 +179,10 @@ macro_rules! count_test_cases { "wikidata/wikidata_properties.json", "$..*.value" => 132188; "wikidata_properties.json $..*.value (child) any desc" )] + #[test_case( + "wikidata/wikidata_properties.json", "$..*[5]" => 2511; + "wikidata_properties.json $..*[5] (child) any desc nneg array index" + )] #[test_case( "wikidata/wikidata_properties.json", "$..P7103.claims.P31..references..snaks.P4656..hash" => 1; "wikidata_properties.json $..P7103.claims.P31..references..snaks.P4656..hash" @@ -179,8 +203,12 @@ macro_rules! indices_test_cases { #[test_case("basic/atomic_descendant.json", "$..*" => vec![9,24,34,51]; "atomic_descendant.json any descendant $..*")] #[test_case("basic/atomic_descendant.json", "$..a..b" => Vec::::new(); "atomic_descendant.json $..a..b")] #[test_case("basic/atomic_after_complex.json", "$.a..b" => vec![174]; "atomic_after_complex.json $.a..b")] + #[test_case("basic/atomic_after_complex.json", "$..d[2]" => vec![111]; "atomic_after_complex.json named desc nneg nonexistent array index")] + #[test_case("basic/atomic_after_complex.json", "$..*[2]" => vec![111]; "atomic_after_complex.json any desc nneg nonexistent array index")] #[test_case("basic/array_root.json", "$" => vec![0]; "array_root.json $")] #[test_case("basic/array_root.json", "" => vec![0]; "array_root.json")] + #[test_case("basic/array_root.json", "$[0]" => Vec::::new(); "array_root.json nneg array index simple")] + #[test_case("basic/array_root_singleton.json", "$[0]" => vec![1]; "array_root_singleton.json nneg array index simple")] #[test_case("basic/child.json", "$..a..b.c..d" => vec![984, 1297, 1545]; "child.json $..a..b.c..d")] #[test_case("basic/child_hell.json", "$..x..a.b.a.b.c" => vec![198, 756, 1227, 1903, 2040, 2207]; "child_hell.json $..x..a.b.a.b.c")] #[test_case("basic/empty.json", "" => Vec::::new(); "empty.json")] @@ -230,6 +258,11 @@ macro_rules! indices_test_cases { #[test_case("basic/compressed/singletons_and_empties.json", r#"$.*.*"# => vec![6, 15]; "compressed singletons_and_empties.json")] #[test_case("basic/compressed/skipping.json", r#"$.a.b"# => vec![452]; "compressed skipping")] #[test_case("basic/compressed/small_no_list.json", "$..person..phoneNumber..number" => vec![176, 380]; "compressed small_no_list.json $..person..phoneNumber..number")] + #[test_case("basic/small.json", "$..person..*[1].type" => vec![402, 1028]; "small.json nneg array $..person..*[1].type")] + #[test_case("basic/compressed/small.json", "$..person..*[1].type" => vec![203, 451]; "compressed anydesc small.json $..person..*[1].type")] + #[test_case("basic/compressed/small.json", "$..person..[1].type" => vec![203, 451]; "compressed nneg array small.json $..person..[1].type nneg")] + #[test_case("basic/compressed/small.json", "$..person.phoneNumber[1].type" => vec![203, 451]; "compressed nneg array direct small.json $..person..[1].type")] + #[test_case("basic/compressed/small.json", "$..person.phoneNumber[0].type" => vec![159, 407]; "compressed nneg array direct small.json $..person..[0].type")] #[test_case("basic/compressed/small.json", "$..person..phoneNumber..number" => vec![177, 219, 425, 467]; "compressed small.json $..person..phoneNumber..number")] #[test_case( "twitter/compressed/twitter.json", @@ -242,11 +275,26 @@ macro_rules! indices_test_cases { => vec![3487, 9835, 12717, 52573, 64602, 77996, 119306, 121917, 201072, 212697, 215342, 241825, 288268, 310029, 312971, 445430, 454459, 464575]; "compressed twitter.json $..user..entities.url (child)")] #[test_case("twitter/compressed/twitter_urls.json", "$..entities..urls..url" => vec![145, 326]; "compressed twitter_urls.json $..entities..urls..url")] + #[test_case("twitter/compressed/twitter_urls.json", "$..[0]" => vec![1, 139, 183, 249, 320, 364]; "compressed twitter_urls.json nneg array first descendent")] + #[test_case("twitter/compressed/twitter_urls.json", "$[0]" => vec![1]; "compressed twitter_urls.json nneg array first root only")] #[test_case("twitter/compressed/twitter_urls.json", "$..entities.urls..url" => vec![145, 326]; "compressed twitter_urls.json $..entities.urls..url (child)")] #[test_case( "wikidata/wikidata_properties.json", "$..P7103.claims.P31..references..snaks.P4656..hash" => vec![22639033]; "wikidata_properties.json $..P7103.claims.P31..references..snaks.P4656..hash" )] + #[test_case("basic/array_root_nested.json", "$[0]" => vec![6]; "array_root_nested.json nneg array top")] + #[test_case("basic/array_root_nested.json", "$[0].*" => vec![16]; "array_root_nested.json nneg array inner any child")] + #[test_case("basic/array_root_nested.json", "$.*[0]" => vec![16,49]; "array_root_nested.json any nneg array inner")] + #[test_case("basic/array_root_nested.json", "$[2][1][0][1]" => vec![95]; "array_root_nested.json any nneg array inner inner inner")] + #[test_case("basic/array_root_nested.json", "$[2][1].*" => vec![75, 142, 209]; "array_root_nested.json any nneg array inner inner child")] + #[test_case("basic/array_root_nested.json", "$[2].*[1]" => vec![142]; "array_root_nested.json nneg array child inner")] + #[test_case("basic/array_root_nested.json", "$[2]..*[1]" => vec![95, 142, 162, 229]; "array_root_nested.json nneg array anydesc inner")] + #[test_case("basic/array_root_nested.json", "$[2][1].*[1]" => vec![95, 162, 229]; "array_root_nested.json nneg nneg any nneg")] + #[test_case("basic/array_root_nested.json", "$[2]..*" => vec![49, 61, 75, 93, 95, 142, 160, 162, 209, 227, 229]; "array_root_nested.json nneg array anydesc")] + #[test_case("basic/array_root_nested.json", "$..*[0]" => vec![16,17,49,75,93,160,227]; "array_root_nested.json anydesc nneg array first")] + #[test_case("basic/array_root_nested.json", "$..*[2]" => vec![209]; "array_root_nested.json anydesc nneg array third")] + #[test_case("basic/array_root_nested.json", "$[2][0]" => vec![49]; "array_root_nested.json nneg array direct first")] + #[test_case("basic/array_root_nested.json", "$[2][1]" => vec![61]; "array_root_nested.json nneg array direct second")] fn $test_name(test_path: &str, query_string: &str) -> Vec { let contents = get_contents(test_path); let query = JsonPathQuery::parse(query_string).unwrap(); diff --git a/crates/rsonpath-lib/tests/query_parser_tests.rs b/crates/rsonpath-lib/tests/query_parser_tests.rs index cd2d668a..40e9da34 100644 --- a/crates/rsonpath-lib/tests/query_parser_tests.rs +++ b/crates/rsonpath-lib/tests/query_parser_tests.rs @@ -35,11 +35,23 @@ fn wildcard_child_selector() { assert_eq!(result, expected_query); } +#[test] +fn descendant_nonnegative_array_indexed_selector() { + let input = "$..[5]"; + let expected_query = JsonPathQueryBuilder::new() + .array_index_descendant(5.try_into().unwrap()) + .into(); + + let result = JsonPathQuery::parse(input).expect("expected Ok"); + + assert_eq!(result, expected_query); +} + #[test] fn nonnegative_array_indexed_selector() { let input = "$[5]"; let expected_query = JsonPathQueryBuilder::new() - .array_index(5.try_into().unwrap()) + .array_index_child(5.try_into().unwrap()) .into(); let result = JsonPathQuery::parse(input).expect("expected Ok"); @@ -51,8 +63,8 @@ fn nonnegative_array_indexed_selector() { fn multiple_nonnegative_array_indexed_selector() { let input = "$[5][2]"; let expected_query = JsonPathQueryBuilder::new() - .array_index(5.try_into().unwrap()) - .array_index(2.try_into().unwrap()) + .array_index_child(5.try_into().unwrap()) + .array_index_child(2.try_into().unwrap()) .into(); let result = JsonPathQuery::parse(input).expect("expected Ok"); @@ -64,7 +76,7 @@ fn multiple_nonnegative_array_indexed_selector() { fn zeroth_array_indexed_selector() { let input = "$[0]"; let expected_query = JsonPathQueryBuilder::new() - .array_index(0.try_into().unwrap()) + .array_index_child(0.try_into().unwrap()) .into(); let result = JsonPathQuery::parse(input).expect("expected Ok");