From 3bf69bbf131144d12cb60f4b0cae0d53b87aec25 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Thu, 30 Nov 2023 09:55:32 +0000 Subject: [PATCH 01/29] feat(inverted_index.search): add fst applier Signed-off-by: Zhenchi --- Cargo.lock | 3 + Cargo.toml | 1 + src/index/Cargo.toml | 2 + src/index/src/inverted_index.rs | 4 + src/index/src/inverted_index/error.rs | 43 ++- src/index/src/inverted_index/format/reader.rs | 4 +- src/index/src/inverted_index/search.rs | 16 + .../src/inverted_index/search/fst_apply.rs | 32 ++ .../search/fst_apply/intersection_apply.rs | 325 ++++++++++++++++++ .../search/fst_apply/keys_apply.rs | 302 ++++++++++++++++ .../src/inverted_index/search/predicate.rs | 73 ++++ src/index/src/lib.rs | 2 + 12 files changed, 803 insertions(+), 4 deletions(-) create mode 100644 src/index/src/inverted_index/search.rs create mode 100644 src/index/src/inverted_index/search/fst_apply.rs create mode 100644 src/index/src/inverted_index/search/fst_apply/intersection_apply.rs create mode 100644 src/index/src/inverted_index/search/fst_apply/keys_apply.rs create mode 100644 src/index/src/inverted_index/search/predicate.rs diff --git a/Cargo.lock b/Cargo.lock index 099d0ead5708..bb991ff45c1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3920,6 +3920,8 @@ dependencies = [ "futures", "greptime-proto", "prost 0.12.2", + "regex", + "regex-automata 0.1.10", "snafu", "tokio", "tokio-util", @@ -6953,6 +6955,7 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ + "fst", "regex-syntax 0.6.29", ] diff --git a/Cargo.toml b/Cargo.toml index 449e17298310..75e0ad565580 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,6 +107,7 @@ prost = "0.12" raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" } rand = "0.8" regex = "1.8" +regex-automata = { version = "0.1", features = ["transducer"] } reqwest = { version = "0.11", default-features = false, features = [ "json", "rustls-tls-native-roots", diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 6d667336903d..54b8c75f7edc 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -13,6 +13,8 @@ fst.workspace = true futures.workspace = true greptime-proto.workspace = true prost.workspace = true +regex-automata.workspace = true +regex.workspace = true snafu.workspace = true [dev-dependencies] diff --git a/src/index/src/inverted_index.rs b/src/index/src/inverted_index.rs index 43a2234fdde3..96db32a0cb4f 100644 --- a/src/index/src/inverted_index.rs +++ b/src/index/src/inverted_index.rs @@ -14,3 +14,7 @@ pub mod error; pub mod format; +pub mod search; + +pub type FstMap = fst::Map>; +pub type Bytes = Vec; diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index e1c650a3637b..65658c28709f 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -20,6 +20,8 @@ use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use snafu::{Location, Snafu}; +use crate::inverted_index::search::predicate::Predicate; + #[derive(Snafu)] #[snafu(visibility(pub))] #[stack_trace_debug] @@ -75,6 +77,38 @@ pub enum Error { error: prost::DecodeError, location: Location, }, + + #[snafu(display("Failed to parse regex pattern: {pattern}"))] + ParseRegex { + #[snafu(source)] + error: regex::Error, + pattern: String, + location: Location, + }, + + #[snafu(display("Failed to parse regex DFA"))] + ParseDFA { + #[snafu(source)] + error: regex_automata::Error, + location: Location, + }, + + #[snafu(display("Unexpected empty predicates to construct fst applier"))] + EmptyPredicates { location: Location }, + + #[snafu(display("Failed to construct intersection fst applier with InList predicate"))] + IntersectionApplierWithInList { location: Location }, + + #[snafu(display("Failed to construct keys fst applier without InList predicate"))] + KeysApplierWithoutInList { location: Location }, + + #[snafu(display( + "Failed to construct keys fst applier with unexpected predicates: {predicates:?}" + ))] + KeysApplierUnexpectedPredicates { + location: Location, + predicates: Vec, + }, } impl ErrorExt for Error { @@ -87,7 +121,14 @@ impl ErrorExt for Error { | UnexpectedOffsetSize { .. } | UnexpectedBlobSize { .. } | DecodeProto { .. } - | DecodeFst { .. } => StatusCode::Unexpected, + | DecodeFst { .. } + | KeysApplierUnexpectedPredicates { .. } => StatusCode::Unexpected, + + ParseRegex { .. } + | ParseDFA { .. } + | KeysApplierWithoutInList { .. } + | IntersectionApplierWithInList { .. } + | EmptyPredicates { .. } => StatusCode::InvalidArguments, } } diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index 2fbe703798d4..c18c6bd83eb7 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -17,12 +17,10 @@ mod footer; use async_trait::async_trait; use common_base::BitVec; -use fst::Map; use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas}; use crate::inverted_index::error::Result; - -pub type FstMap = Map>; +use crate::inverted_index::FstMap; /// InvertedIndexReader defines an asynchronous reader of inverted index data #[async_trait] diff --git a/src/index/src/inverted_index/search.rs b/src/index/src/inverted_index/search.rs new file mode 100644 index 000000000000..d4f4b71f05c9 --- /dev/null +++ b/src/index/src/inverted_index/search.rs @@ -0,0 +1,16 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod fst_apply; +pub mod predicate; diff --git a/src/index/src/inverted_index/search/fst_apply.rs b/src/index/src/inverted_index/search/fst_apply.rs new file mode 100644 index 000000000000..32b71b2bf3d9 --- /dev/null +++ b/src/index/src/inverted_index/search/fst_apply.rs @@ -0,0 +1,32 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod intersection_apply; +mod keys_apply; + +pub use intersection_apply::IntersectionFstApplier; +pub use keys_apply::KeysFstApplier; + +use crate::inverted_index::FstMap; + +/// A trait for objects that can process a finite state transducer (FstMap) and return +/// associated values. +pub trait FstApplier: Send + Sync { + /// Retrieves values from an FstMap. + /// + /// * `fst`: A reference to the FstMap from which the values will be fetched. + /// + /// Returns a `Vec`, with each u64 being a value from the FstMap. + fn apply(&self, fst: &FstMap) -> Vec; +} diff --git a/src/index/src/inverted_index/search/fst_apply/intersection_apply.rs b/src/index/src/inverted_index/search/fst_apply/intersection_apply.rs new file mode 100644 index 000000000000..a0ae0d7b9afb --- /dev/null +++ b/src/index/src/inverted_index/search/fst_apply/intersection_apply.rs @@ -0,0 +1,325 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use fst::map::OpBuilder; +use fst::{IntoStreamer, Streamer}; +use regex_automata::DenseDFA; +use snafu::{ensure, ResultExt}; + +use crate::inverted_index::error::{ + EmptyPredicatesSnafu, IntersectionApplierWithInListSnafu, ParseDFASnafu, Result, +}; +use crate::inverted_index::search::fst_apply::FstApplier; +use crate::inverted_index::search::predicate::{Predicate, Range}; +use crate::inverted_index::FstMap; + +type Dfa = DenseDFA, usize>; + +/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns. +pub struct IntersectionFstApplier { + /// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap. + ranges: Vec, + + /// A list of `Dfa` compiled from regular expression patterns. + dfas: Vec, +} + +impl FstApplier for IntersectionFstApplier { + fn apply(&self, fst: &FstMap) -> Vec { + let mut op = OpBuilder::new(); + + for range in &self.ranges { + match (range.lower.as_ref(), range.upper.as_ref()) { + (Some(lower), Some(upper)) => match (lower.inclusive, upper.inclusive) { + (true, true) => op.push(fst.range().ge(&lower.value).le(&upper.value)), + (true, false) => op.push(fst.range().ge(&lower.value).lt(&upper.value)), + (false, true) => op.push(fst.range().gt(&lower.value).le(&upper.value)), + (false, false) => op.push(fst.range().gt(&lower.value).lt(&upper.value)), + }, + (Some(lower), None) => match lower.inclusive { + true => op.push(fst.range().ge(&lower.value)), + false => op.push(fst.range().gt(&lower.value)), + }, + (None, Some(upper)) => match upper.inclusive { + true => op.push(fst.range().le(&upper.value)), + false => op.push(fst.range().lt(&upper.value)), + }, + (None, None) => op.push(fst), + } + } + + for dfa in &self.dfas { + op.push(fst.search(dfa)); + } + + let mut stream = op.intersection().into_stream(); + let mut values = Vec::new(); + while let Some((_, v)) = stream.next() { + values.push(v[0].value) + } + values + } +} + +impl IntersectionFstApplier { + /// Attempts to create an `IntersectionFstApplier` from a list of `Predicate`. + /// + /// This function only accepts predicates of the variants `Range` and `RegexMatch`. + /// It does not accept `InList` predicates and will return an error if any are found. + /// `InList` predicates are handled by `KeysFstApplier`. + pub fn try_from(predicates: Vec) -> Result { + ensure!(!predicates.is_empty(), EmptyPredicatesSnafu); + + let mut dfas = Vec::with_capacity(predicates.len()); + let mut ranges = Vec::with_capacity(predicates.len()); + + for predicate in predicates { + match predicate { + Predicate::Range(range) => ranges.push(range.range), + Predicate::RegexMatch(regex) => { + let dfa = DenseDFA::new(®ex.pattern); + let dfa = dfa.context(ParseDFASnafu)?; + dfas.push(dfa); + } + // Rejection of `InList` predicates is enforced here. + Predicate::InList(_) => { + return IntersectionApplierWithInListSnafu.fail(); + } + } + } + + Ok(Self { dfas, ranges }) + } +} + +impl TryFrom> for IntersectionFstApplier { + type Error = crate::inverted_index::error::Error; + + fn try_from(predicates: Vec) -> Result { + Self::try_from(predicates) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use super::*; + use crate::inverted_index::error::Error; + use crate::inverted_index::search::predicate::{ + Bound, InListPredicate, RangePredicate, RegexMatchPredicate, + }; + + fn create_applier_from_range(range: Range) -> Result { + IntersectionFstApplier::try_from(vec![Predicate::Range(RangePredicate { range })]) + } + + fn create_applier_from_pattern(pattern: &str) -> Result { + IntersectionFstApplier::try_from(vec![Predicate::RegexMatch(RegexMatchPredicate { + pattern: pattern.to_string(), + })]) + } + + #[test] + fn test_intersection_fst_applier_with_ranges() { + let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap(); + + let applier_inclusive_lower = create_applier_from_range(Range { + lower: Some(Bound { + value: b"bb".to_vec(), + inclusive: true, + }), + upper: None, + }) + .unwrap(); + let results = applier_inclusive_lower.apply(&test_fst); + assert_eq!(results, vec![2, 3]); + + let applier_exclusive_lower = create_applier_from_range(Range { + lower: Some(Bound { + value: b"bb".to_vec(), + inclusive: false, + }), + upper: None, + }) + .unwrap(); + let results = applier_exclusive_lower.apply(&test_fst); + assert_eq!(results, vec![3]); + + let applier_inclusive_upper = create_applier_from_range(Range { + lower: None, + upper: Some(Bound { + value: b"bb".to_vec(), + inclusive: true, + }), + }) + .unwrap(); + let results = applier_inclusive_upper.apply(&test_fst); + assert_eq!(results, vec![1, 2]); + + let applier_exclusive_upper = create_applier_from_range(Range { + lower: None, + upper: Some(Bound { + value: b"bb".to_vec(), + inclusive: false, + }), + }) + .unwrap(); + let results = applier_exclusive_upper.apply(&test_fst); + assert_eq!(results, vec![1]); + + let applier_inclusive_bounds = create_applier_from_range(Range { + lower: Some(Bound { + value: b"aa".to_vec(), + inclusive: true, + }), + upper: Some(Bound { + value: b"cc".to_vec(), + inclusive: true, + }), + }) + .unwrap(); + let results = applier_inclusive_bounds.apply(&test_fst); + assert_eq!(results, vec![1, 2, 3]); + + let applier_exclusive_bounds = create_applier_from_range(Range { + lower: Some(Bound { + value: b"aa".to_vec(), + inclusive: false, + }), + upper: Some(Bound { + value: b"cc".to_vec(), + inclusive: false, + }), + }) + .unwrap(); + let results = applier_exclusive_bounds.apply(&test_fst); + assert_eq!(results, vec![2]); + } + + #[test] + fn test_intersection_fst_applier_with_valid_pattern() { + let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap(); + + let applier = create_applier_from_pattern("a.?").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1]); + + let applier = create_applier_from_pattern("b.?").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![2]); + + let applier = create_applier_from_pattern("c.?").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![3]); + + let applier = create_applier_from_pattern("a.*").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1]); + + let applier = create_applier_from_pattern("b.*").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![2]); + + let applier = create_applier_from_pattern("c.*").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![3]); + + let applier = create_applier_from_pattern("d.?").unwrap(); + let results = applier.apply(&test_fst); + assert!(results.is_empty()); + + let applier = create_applier_from_pattern("a.?|b.?").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1, 2]); + + let applier = create_applier_from_pattern("d.?|a.?").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1]); + + let applier = create_applier_from_pattern(".*").unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1, 2, 3]); + } + + #[test] + fn test_intersection_fst_applier_with_composite_predicates() { + let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap(); + + let applier = IntersectionFstApplier::try_from(vec![ + Predicate::Range(RangePredicate { + range: Range { + lower: Some(Bound { + value: b"aa".to_vec(), + inclusive: true, + }), + upper: Some(Bound { + value: b"cc".to_vec(), + inclusive: true, + }), + }, + }), + Predicate::RegexMatch(RegexMatchPredicate { + pattern: "a.?".to_string(), + }), + ]) + .unwrap(); + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1]); + + let applier = IntersectionFstApplier::try_from(vec![ + Predicate::Range(RangePredicate { + range: Range { + lower: Some(Bound { + value: b"aa".to_vec(), + inclusive: false, + }), + upper: Some(Bound { + value: b"cc".to_vec(), + inclusive: true, + }), + }, + }), + Predicate::RegexMatch(RegexMatchPredicate { + pattern: "a.?".to_string(), + }), + ]) + .unwrap(); + let results = applier.apply(&test_fst); + assert!(results.is_empty()); + } + + #[test] + fn test_intersection_fst_applier_with_invalid_pattern() { + let result = create_applier_from_pattern("a("); + assert!(matches!(result, Err(Error::ParseDFA { .. }))); + } + + #[test] + fn test_intersection_fst_applier_with_empty_predicates() { + let result = IntersectionFstApplier::try_from(vec![]); + assert!(matches!(result, Err(Error::EmptyPredicates { .. }))); + } + + #[test] + fn test_intersection_fst_applier_with_in_list_predicate() { + let result = IntersectionFstApplier::try_from(vec![Predicate::InList(InListPredicate { + list: HashSet::from_iter([b"one".to_vec(), b"two".to_vec()]), + })]); + assert!(matches!( + result, + Err(Error::IntersectionApplierWithInList { .. }) + )); + } +} diff --git a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs new file mode 100644 index 000000000000..8484248d74f7 --- /dev/null +++ b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs @@ -0,0 +1,302 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use snafu::{ensure, ResultExt}; + +use crate::inverted_index::error::{ + EmptyPredicatesSnafu, KeysApplierUnexpectedPredicatesSnafu, KeysApplierWithoutInListSnafu, + ParseRegexSnafu, Result, +}; +use crate::inverted_index::search::fst_apply::FstApplier; +use crate::inverted_index::search::predicate::Predicate; +use crate::inverted_index::{Bytes, FstMap}; + +/// `KeysFstApplier` is responsible for applying a search using a set of predefined keys +/// against an FstMap to fetch associated values. +pub struct KeysFstApplier { + /// A list of keys to be fetched directly from the FstMap. + keys: Vec, +} + +impl FstApplier for KeysFstApplier { + fn apply(&self, fst: &FstMap) -> Vec { + self.keys.iter().filter_map(|k| fst.get(k)).collect() + } +} + +impl KeysFstApplier { + /// Tries to create a `KeysFstApplier` from a list of predicates. + /// + /// This function constructs the applier by intersecting keys from one or more `InList` predicates, + /// which are required. It then optionally refines this set using any additional `Range` and `RegexMatch` + /// predicates provided. + pub fn try_from(mut predicates: Vec) -> Result { + ensure!(!predicates.is_empty(), EmptyPredicatesSnafu); + + let (in_lists, others) = Self::split_at_in_lists(&mut predicates); + let (ranges, regexes) = Self::split_at_ranges(others); + Self::ensure_all_regexes(regexes)?; + + ensure!(!in_lists.is_empty(), KeysApplierWithoutInListSnafu); + let intersected_keys = Self::intersect_with_lists(in_lists); + let range_matched_keys = Self::filter_by_ranges(intersected_keys, ranges); + let regex_matched_keys = Self::filter_by_regexes(range_matched_keys, regexes)?; + + Ok(Self { + keys: regex_matched_keys, + }) + } + + fn split_at_in_lists(predicates: &mut [Predicate]) -> (&mut [Predicate], &mut [Predicate]) { + let in_list_index = predicates + .iter_mut() + .partition_in_place(|p| matches!(p, Predicate::InList(_))); + predicates.split_at_mut(in_list_index) + } + + fn split_at_ranges(predicates: &mut [Predicate]) -> (&mut [Predicate], &mut [Predicate]) { + let range_index = predicates + .iter_mut() + .partition_in_place(|p| matches!(p, Predicate::Range(_))); + predicates.split_at_mut(range_index) + } + + fn ensure_all_regexes(ps: &[Predicate]) -> Result<()> { + ensure!( + ps.iter().all(|p| matches!(p, Predicate::RegexMatch(_))), + KeysApplierUnexpectedPredicatesSnafu { + predicates: ps.to_vec() + } + ); + Ok(()) + } + + fn intersect_with_lists(in_lists: &mut [Predicate]) -> Vec { + #[inline] + fn get_list(p: &Predicate) -> &HashSet { + match p { + Predicate::InList(i) => &i.list, + _ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists + } + } + + in_lists.sort_unstable_by_key(|p| get_list(p).len()); + get_list(&in_lists[0]) + .iter() + .filter(|c| in_lists[1..].iter().all(|s| get_list(s).contains(*c))) + .cloned() + .collect() + } + + fn filter_by_ranges(mut keys: Vec, ranges: &[Predicate]) -> Vec { + #[inline] + fn range_contains(p: &Predicate, key: &Bytes) -> bool { + let (lower, upper) = match p { + Predicate::Range(r) => (&r.range.lower, &r.range.upper), + _ => unreachable!(), // `ranges` is filtered by `split_at_ranges` + }; + + match (lower, upper) { + (Some(lower), Some(upper)) => match (lower.inclusive, upper.inclusive) { + (true, true) => &lower.value <= key && key <= &upper.value, + (true, false) => &lower.value <= key && key < &upper.value, + (false, true) => &lower.value < key && key <= &upper.value, + (false, false) => &lower.value < key && key < &upper.value, + }, + (Some(lower), None) => match lower.inclusive { + true => &lower.value <= key, + false => &lower.value < key, + }, + (None, Some(upper)) => match upper.inclusive { + true => key <= &upper.value, + false => key < &upper.value, + }, + (None, None) => true, + } + } + + keys.retain(|k| ranges.iter().all(|r| range_contains(r, k))); + keys + } + + fn filter_by_regexes(mut keys: Vec, regexes: &[Predicate]) -> Result> { + for p in regexes { + let pattern = match p { + Predicate::RegexMatch(r) => &r.pattern, + _ => unreachable!(), // checked by `ensure_all_regexes` + }; + + let regex = regex::Regex::new(pattern).with_context(|_| ParseRegexSnafu { + pattern: pattern.to_owned(), + })?; + + keys.retain(|k| { + std::str::from_utf8(k) + .map(|k| regex.is_match(k)) + .unwrap_or_default() + }); + if keys.is_empty() { + return Ok(keys); + } + } + + Ok(keys) + } +} + +impl TryFrom> for KeysFstApplier { + type Error = crate::inverted_index::error::Error; + fn try_from(predicates: Vec) -> Result { + Self::try_from(predicates) + } +} + +#[cfg(test)] +mod tests { + use fst::Map as FstMap; + + use super::*; + use crate::inverted_index::error::Error; + use crate::inverted_index::search::predicate::{ + Bound, InListPredicate, Predicate, Range, RangePredicate, RegexMatchPredicate, + }; + + fn create_fst_map(items: &[(&[u8], u64)]) -> FstMap> { + let mut items = items + .iter() + .map(|(k, v)| (k.to_vec(), *v)) + .collect::>(); + items.sort(); + FstMap::from_iter(items).unwrap() + } + + fn b(s: &str) -> Vec { + s.as_bytes().to_vec() + } + + #[test] + fn test_keys_fst_applier_apply() { + let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]); + let applier = KeysFstApplier { + keys: vec![b("foo"), b("baz")], + }; + + let results = applier.apply(&test_fst); + assert_eq!(results, vec![1, 3]); + } + + #[test] + fn test_keys_fst_applier_with_empty_keys() { + let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]); + let applier = KeysFstApplier { keys: vec![] }; + + let results = applier.apply(&test_fst); + assert!(results.is_empty()); + } + + #[test] + fn test_keys_fst_applier_with_unmatched_keys() { + let test_fst = create_fst_map(&[(b"foo", 1), (b"bar", 2), (b"baz", 3)]); + let applier = KeysFstApplier { + keys: vec![b("qux"), b("quux")], + }; + + let results = applier.apply(&test_fst); + assert!(results.is_empty()); + } + + #[test] + fn test_keys_fst_applier_try_from() { + let predicates = vec![ + Predicate::InList(InListPredicate { + list: HashSet::from_iter(vec![b("foo"), b("bar")]), + }), + Predicate::Range(RangePredicate { + range: Range { + lower: Some(Bound { + value: b("ba"), + inclusive: true, + }), + upper: None, + }, + }), + Predicate::RegexMatch(RegexMatchPredicate { + pattern: ".*r".to_string(), + }), + ]; + let applier = KeysFstApplier::try_from(predicates).unwrap(); + assert_eq!(applier.keys, vec![b("bar")]); + } + + #[test] + fn test_keys_fst_applier_try_from_filter_out_unmatched_keys() { + let predicates = vec![ + Predicate::InList(InListPredicate { + list: HashSet::from_iter(vec![b("foo"), b("bar")]), + }), + Predicate::Range(RangePredicate { + range: Range { + lower: Some(Bound { + value: b("f"), + inclusive: true, + }), + upper: None, + }, + }), + Predicate::RegexMatch(RegexMatchPredicate { + pattern: ".*o".to_string(), + }), + ]; + let applier = KeysFstApplier::try_from(predicates).unwrap(); + assert_eq!(applier.keys, vec![b("foo")]); + } + + #[test] + fn test_keys_fst_applier_try_from_empty_predicates() { + let predicates = vec![]; + let result = KeysFstApplier::try_from(predicates); + assert!(matches!(result, Err(Error::EmptyPredicates { .. }))); + } + + #[test] + fn test_keys_fst_applier_try_from_without_in_list() { + let predicates = vec![Predicate::Range(RangePredicate { + range: Range { + lower: Some(Bound { + value: b("bar"), + inclusive: true, + }), + upper: None, + }, + })]; + let result = KeysFstApplier::try_from(predicates); + assert!(result.is_err()); + } + + #[test] + fn test_keys_fst_applier_try_from_with_invalid_regex() { + let predicates = vec![ + Predicate::InList(InListPredicate { + list: HashSet::from_iter(vec![b("foo"), b("bar")]), + }), + Predicate::RegexMatch(RegexMatchPredicate { + pattern: "*invalid regex".to_string(), + }), + ]; + let result = KeysFstApplier::try_from(predicates); + assert!(matches!(result, Err(Error::ParseRegex { .. }))); + } +} diff --git a/src/index/src/inverted_index/search/predicate.rs b/src/index/src/inverted_index/search/predicate.rs new file mode 100644 index 000000000000..25101e0ece5b --- /dev/null +++ b/src/index/src/inverted_index/search/predicate.rs @@ -0,0 +1,73 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use crate::inverted_index::Bytes; + +/// Enumerates types of predicates for value filtering. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Predicate { + /// Predicate for matching values in a list. + InList(InListPredicate), + + /// Predicate for matching values within a range. + Range(RangePredicate), + + /// Predicate for matching values against a regex pattern. + RegexMatch(RegexMatchPredicate), +} + +/// `InListPredicate` contains a list of acceptable values. A value needs to match at least +/// one of the elements (logical OR semantic) for the predicate to be satisfied. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InListPredicate { + /// List of acceptable values. + pub list: HashSet, +} + +/// `Bound` is a sub-component of a range, representing a single-sided limit that could be inclusive or exclusive. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Bound { + /// Whether the bound is inclusive or exclusive. + pub inclusive: bool, + /// The value of the bound. + pub value: Bytes, +} + +/// `Range` defines a single continuous range which can optionally have a lower and/or upper limit. +/// Both the lower and upper bounds must be satisfied for the range condition to be true. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Range { + /// The lower bound of the range. + pub lower: Option, + /// The upper bound of the range. + pub upper: Option, +} + +/// `RangePredicate` encapsulates a range condition that must be satisfied +/// for the predicate to hold true (logical AND semantic between the bounds). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RangePredicate { + /// The range condition. + pub range: Range, +} + +/// `RegexMatchPredicate` encapsulates a single regex pattern. A value must match +/// the pattern for the predicate to be satisfied. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RegexMatchPredicate { + /// The regex pattern. + pub pattern: String, +} diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index efed1e963bf0..e7f448c398ef 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -12,4 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(iter_partition_in_place)] + pub mod inverted_index; From 8fdd24fe7e70bdf0320367fd15960a6dbc734260 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Thu, 30 Nov 2023 10:16:38 +0000 Subject: [PATCH 02/29] fix: typos Signed-off-by: Zhenchi --- src/index/src/inverted_index/search/fst_apply/keys_apply.rs | 2 +- src/mito2/src/engine/truncate_test.rs | 2 +- src/mito2/src/read/compat.rs | 2 +- src/object-store/src/layers/lru_cache/read_cache.rs | 2 +- src/promql/src/planner.rs | 4 ++-- src/servers/src/tls.rs | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs index 8484248d74f7..23503a97b195 100644 --- a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs +++ b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs @@ -227,7 +227,7 @@ mod tests { Predicate::Range(RangePredicate { range: Range { lower: Some(Bound { - value: b("ba"), + value: b("bar"), inclusive: true, }), upper: None, diff --git a/src/mito2/src/engine/truncate_test.rs b/src/mito2/src/engine/truncate_test.rs index 8bf2ddb7e737..79b782e8766f 100644 --- a/src/mito2/src/engine/truncate_test.rs +++ b/src/mito2/src/engine/truncate_test.rs @@ -304,7 +304,7 @@ async fn test_engine_truncate_during_flush() { let entry_id = version_data.last_entry_id; let sequence = version_data.committed_sequence; - // Flush reigon. + // Flush region. let engine_cloned = engine.clone(); let flush_task = tokio::spawn(async move { info!("do flush task!!!!"); diff --git a/src/mito2/src/read/compat.rs b/src/mito2/src/read/compat.rs index b253d183589b..1efa75e45c7d 100644 --- a/src/mito2/src/read/compat.rs +++ b/src/mito2/src/read/compat.rs @@ -177,7 +177,7 @@ fn may_compat_primary_key( CompatReaderSnafu { region_id: expect.region_id, reason: format!( - "primary key has more columns {} than exepct {}", + "primary key has more columns {} than expect {}", actual.primary_key.len(), expect.primary_key.len() ), diff --git a/src/object-store/src/layers/lru_cache/read_cache.rs b/src/object-store/src/layers/lru_cache/read_cache.rs index 5ef9f2d1fb55..61c1c8285595 100644 --- a/src/object-store/src/layers/lru_cache/read_cache.rs +++ b/src/object-store/src/layers/lru_cache/read_cache.rs @@ -114,7 +114,7 @@ impl ReadCache { (self.mem_cache.entry_count(), self.mem_cache.weighted_size()) } - /// Invalidte all cache items which key starts with `prefix`. + /// Invalidate all cache items which key starts with `prefix`. pub(crate) async fn invalidate_entries_with_prefix(&self, prefix: String) { // Safety: always ok when building cache with `support_invalidation_closures`. self.mem_cache diff --git a/src/promql/src/planner.rs b/src/promql/src/planner.rs index 7dfa17a17b3b..7bcc8dabf63f 100644 --- a/src/promql/src/planner.rs +++ b/src/promql/src/planner.rs @@ -2228,7 +2228,7 @@ mod test { "some_metric.timestamp", ], ), - // single not_eq mathcer + // single not_eq matcher ( r#"some_metric{__field__!="field_1"}"#, vec![ @@ -2240,7 +2240,7 @@ mod test { "some_metric.timestamp", ], ), - // two not_eq mathcers + // two not_eq matchers ( r#"some_metric{__field__!="field_1", __field__!="field_2"}"#, vec![ diff --git a/src/servers/src/tls.rs b/src/servers/src/tls.rs index a4e93a818193..9a5827396922 100644 --- a/src/servers/src/tls.rs +++ b/src/servers/src/tls.rs @@ -200,7 +200,7 @@ mod tests { } #[test] - fn test_tls_option_verifiy_ca() { + fn test_tls_option_verify_ca() { let s = r#" { "mode": "verify_ca", @@ -219,7 +219,7 @@ mod tests { } #[test] - fn test_tls_option_verifiy_full() { + fn test_tls_option_verify_full() { let s = r#" { "mode": "verify_full", From 738ebe29c91b53d67392f65714ac36b204496d87 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Fri, 1 Dec 2023 07:15:59 +0000 Subject: [PATCH 03/29] feat(inverted_index.search): add fst values mapper Signed-off-by: Zhenchi --- Cargo.lock | 97 +++++++++++++++ Cargo.toml | 1 + src/index/Cargo.toml | 1 + src/index/src/inverted_index/format/reader.rs | 1 + src/index/src/inverted_index/search.rs | 1 + .../search/fst_values_mapper.rs | 115 ++++++++++++++++++ 6 files changed, 216 insertions(+) create mode 100644 src/index/src/inverted_index/search/fst_values_mapper.rs diff --git a/Cargo.lock b/Cargo.lock index bb991ff45c1d..77bc436c1fb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2797,6 +2797,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "digest" version = "0.10.7" @@ -2895,6 +2901,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" + [[package]] name = "dyn-clone" version = "1.0.16" @@ -3182,6 +3194,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "fnv" version = "1.0.7" @@ -3206,6 +3227,12 @@ dependencies = [ "regex", ] +[[package]] +name = "fragile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" + [[package]] name = "frontend" version = "0.4.4" @@ -3919,6 +3946,7 @@ dependencies = [ "fst", "futures", "greptime-proto", + "mockall", "prost 0.12.2", "regex", "regex-automata 0.1.10", @@ -4873,6 +4901,33 @@ dependencies = [ "uuid", ] +[[package]] +name = "mockall" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c84490118f2ee2d74570d114f3d0493cbf02790df303d2707606c3e14e07c96" +dependencies = [ + "cfg-if 1.0.0", + "downcast", + "fragile", + "lazy_static", + "mockall_derive", + "predicates", + "predicates-tree", +] + +[[package]] +name = "mockall_derive" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" +dependencies = [ + "cfg-if 1.0.0", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "moka" version = "0.12.1" @@ -5101,6 +5156,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -6225,6 +6286,36 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "predicates" +version = "2.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +dependencies = [ + "difflib", + "float-cmp", + "itertools 0.10.5", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "prettydiff" version = "0.6.4" @@ -9195,6 +9286,12 @@ dependencies = [ "libc", ] +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + [[package]] name = "tests-integration" version = "0.4.4" diff --git a/Cargo.toml b/Cargo.toml index 75e0ad565580..5e7839dabba0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,6 +92,7 @@ humantime-serde = "1.1" itertools = "0.10" lazy_static = "1.4" meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" } +mockall = "0.11.4" moka = "0.12" once_cell = "1.18" opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [ diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 54b8c75f7edc..e19240ae286e 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -12,6 +12,7 @@ common-macro.workspace = true fst.workspace = true futures.workspace = true greptime-proto.workspace = true +mockall.workspace = true prost.workspace = true regex-automata.workspace = true regex.workspace = true diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index c18c6bd83eb7..e8e9c72c9579 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -23,6 +23,7 @@ use crate::inverted_index::error::Result; use crate::inverted_index::FstMap; /// InvertedIndexReader defines an asynchronous reader of inverted index data +#[mockall::automock] #[async_trait] pub trait InvertedIndexReader { /// Retrieve metadata of all inverted indices stored within the blob. diff --git a/src/index/src/inverted_index/search.rs b/src/index/src/inverted_index/search.rs index d4f4b71f05c9..236c5e6c6f1c 100644 --- a/src/index/src/inverted_index/search.rs +++ b/src/index/src/inverted_index/search.rs @@ -14,3 +14,4 @@ pub mod fst_apply; pub mod predicate; +pub mod fst_values_mapper; diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs new file mode 100644 index 000000000000..6b20571e5388 --- /dev/null +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -0,0 +1,115 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_base::BitVec; +use greptime_proto::v1::index::InvertedIndexMeta; + +use crate::inverted_index::error::Result; +use crate::inverted_index::format::reader::InvertedIndexReader; + +/// `FstValuesMapper` maps FST-encoded u64 values to their corresponding bitmaps +/// within an inverted index. The higher 32 bits of each u64 value represent the +/// bitmap offset and the lower 32 bits represent its size. This mapper uses these +/// combined offset-size pairs to fetch and union multiple bitmaps into a single `BitVec`. +pub struct FstValuesMapper<'a> { + /// `reader` retrieves bitmap data using offsets and sizes from FST values. + reader: &'a mut dyn InvertedIndexReader, + + /// `metadata` provides context for interpreting the index structures. + metadata: &'a InvertedIndexMeta, +} + +impl<'a> FstValuesMapper<'a> { + pub fn new( + reader: &'a mut dyn InvertedIndexReader, + metadata: &'a InvertedIndexMeta, + ) -> FstValuesMapper<'a> { + FstValuesMapper { reader, metadata } + } + + /// Maps an array of FST values to a `BitVec` by retrieving and combining bitmaps. + pub async fn map_values(&mut self, values: &[u64]) -> Result { + let mut bitmap = BitVec::new(); + + for value in values { + // relative_offset (higher 32 bits), size (lower 32 bits) + let relative_offset = (value >> 32) as u32; + let size = *value as u32; + + let bm = self + .reader + .bitmap(self.metadata, relative_offset, size) + .await?; + + // Ensure the longest BitVec is the left operand to prevent truncation during OR. + bitmap = if bm.len() > bitmap.len() { + bm | bitmap + } else { + bitmap | bm + }; + } + + Ok(bitmap) + } +} + +#[cfg(test)] +mod tests { + use common_base::bit_vec::prelude::*; + + use super::*; + use crate::inverted_index::format::reader::MockInvertedIndexReader; + + #[tokio::test] + async fn test_map_values() { + let mut mock_reader = MockInvertedIndexReader::new(); + + mock_reader + .expect_bitmap() + .withf(|meta, offset, size| { + meta == &InvertedIndexMeta::default() && *offset == 1 && *size == 1 + }) + .returning(|_, _, _| Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1])); + mock_reader + .expect_bitmap() + .withf(|meta, offset, size| { + meta == &InvertedIndexMeta::default() && *offset == 2 && *size == 1 + }) + .returning(|_, _, _| Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1])); + + let meta = InvertedIndexMeta::default(); + let mut values_mapper = FstValuesMapper::new(&mut mock_reader, &meta); + + let result = values_mapper.map_values(&[]).await.unwrap(); + assert_eq!(result.count_ones(), 0); + + let result = values_mapper.map_values(&[(1 << 32) | 1]).await.unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]); + + let result = values_mapper.map_values(&[(2 << 32) | 1]).await.unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]); + + let result = values_mapper + .map_values(&[(1 << 32) | 1, (2 << 32) | 1]) + .await + .unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]); + + let result = values_mapper + .map_values(&[(2 << 32) | 1, (1 << 32) | 1]) + .await + .unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]); + } +} From cfbdf6ba850b6fabc6e5862e3f16f74aa9690d3e Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Fri, 1 Dec 2023 07:19:09 +0000 Subject: [PATCH 04/29] chore: remove meta check Signed-off-by: Zhenchi --- src/index/src/inverted_index/search/fst_values_mapper.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs index 6b20571e5388..bd17d75a0286 100644 --- a/src/index/src/inverted_index/search/fst_values_mapper.rs +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -77,15 +77,11 @@ mod tests { mock_reader .expect_bitmap() - .withf(|meta, offset, size| { - meta == &InvertedIndexMeta::default() && *offset == 1 && *size == 1 - }) + .withf(|_meta, offset, size| *offset == 1 && *size == 1) .returning(|_, _, _| Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1])); mock_reader .expect_bitmap() - .withf(|meta, offset, size| { - meta == &InvertedIndexMeta::default() && *offset == 2 && *size == 1 - }) + .withf(|_meta, offset, size| *offset == 2 && *size == 1) .returning(|_, _, _| Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1])); let meta = InvertedIndexMeta::default(); From 1cc4b2cd4efa6aee1d6749c9a0084a569bb1f031 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Fri, 1 Dec 2023 07:53:50 +0000 Subject: [PATCH 05/29] fix: fmt & clippy Signed-off-by: Zhenchi --- src/index/src/inverted_index/search.rs | 2 +- src/index/src/inverted_index/search/fst_values_mapper.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/index/src/inverted_index/search.rs b/src/index/src/inverted_index/search.rs index 236c5e6c6f1c..e4ab3d5c3b70 100644 --- a/src/index/src/inverted_index/search.rs +++ b/src/index/src/inverted_index/search.rs @@ -13,5 +13,5 @@ // limitations under the License. pub mod fst_apply; -pub mod predicate; pub mod fst_values_mapper; +pub mod predicate; diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs index bd17d75a0286..811fbb5641b3 100644 --- a/src/index/src/inverted_index/search/fst_values_mapper.rs +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -53,11 +53,11 @@ impl<'a> FstValuesMapper<'a> { .await?; // Ensure the longest BitVec is the left operand to prevent truncation during OR. - bitmap = if bm.len() > bitmap.len() { - bm | bitmap + if bm.len() > bitmap.len() { + bitmap = bm | bitmap } else { - bitmap | bm - }; + bitmap |= bm + } } Ok(bitmap) From 5bc2f9ebd4ab3c647f233f30fd18c6b43187d751 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Fri, 1 Dec 2023 09:52:30 +0000 Subject: [PATCH 06/29] refactor: one expect for test Signed-off-by: Zhenchi --- .../src/inverted_index/search/fst_values_mapper.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs index 811fbb5641b3..a881b5683c61 100644 --- a/src/index/src/inverted_index/search/fst_values_mapper.rs +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -74,15 +74,13 @@ mod tests { #[tokio::test] async fn test_map_values() { let mut mock_reader = MockInvertedIndexReader::new(); - - mock_reader - .expect_bitmap() - .withf(|_meta, offset, size| *offset == 1 && *size == 1) - .returning(|_, _, _| Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1])); mock_reader .expect_bitmap() - .withf(|_meta, offset, size| *offset == 2 && *size == 1) - .returning(|_, _, _| Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1])); + .returning(|_, offset, size| match (offset, size) { + (1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]), + (2, 1) => Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]), + _ => unreachable!(), + }); let meta = InvertedIndexMeta::default(); let mut values_mapper = FstValuesMapper::new(&mut mock_reader, &meta); From fb57bafe54ff37728a795ddd016e81a48e4847ef Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Fri, 1 Dec 2023 11:01:43 +0000 Subject: [PATCH 07/29] feat(inverted_index.search): add index applier Signed-off-by: Zhenchi --- Cargo.lock | 2 +- Cargo.toml | 3 + src/index/src/inverted_index/error.rs | 9 +- src/index/src/inverted_index/format/reader.rs | 2 +- .../inverted_index/format/reader/footer.rs | 4 - src/index/src/inverted_index/search.rs | 1 + .../src/inverted_index/search/fst_apply.rs | 1 + .../src/inverted_index/search/index_apply.rs | 32 ++ .../search/index_apply/predicates_apply.rs | 292 ++++++++++++++++++ 9 files changed, 339 insertions(+), 7 deletions(-) create mode 100644 src/index/src/inverted_index/search/index_apply.rs create mode 100644 src/index/src/inverted_index/search/index_apply/predicates_apply.rs diff --git a/Cargo.lock b/Cargo.lock index 77bc436c1fb7..ca3cd1b73129 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3570,7 +3570,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=2b3ae45740a49ec6a0830d71fc09c3093aeb5fe7#2b3ae45740a49ec6a0830d71fc09c3093aeb5fe7" +source = "git+https://github.com/zhongzc/greptime-proto.git?branch=zhongzc/index-add-row-count#6ff29913b75179b0f212eb2bf53cece6229f1aaa" dependencies = [ "prost 0.12.2", "serde", diff --git a/Cargo.toml b/Cargo.toml index 5e7839dabba0..fcc5b08b6b8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -196,3 +196,6 @@ strip = true lto = "thin" debug = false incremental = false + +[patch.'https://github.com/GreptimeTeam/greptime-proto.git'] +greptime-proto = { git = "https://github.com/zhongzc/greptime-proto.git", branch = "zhongzc/index-add-row-count" } diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 65658c28709f..92663859ede7 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -109,6 +109,12 @@ pub enum Error { location: Location, predicates: Vec, }, + + #[snafu(display("Tag not found in index, tag_name: {tag_name}"))] + TagNotFoundInIndex { + tag_name: String, + location: Location, + }, } impl ErrorExt for Error { @@ -128,7 +134,8 @@ impl ErrorExt for Error { | ParseDFA { .. } | KeysApplierWithoutInList { .. } | IntersectionApplierWithInList { .. } - | EmptyPredicates { .. } => StatusCode::InvalidArguments, + | EmptyPredicates { .. } + | TagNotFoundInIndex { .. } => StatusCode::InvalidArguments, } } diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index e8e9c72c9579..705f4b409844 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -25,7 +25,7 @@ use crate::inverted_index::FstMap; /// InvertedIndexReader defines an asynchronous reader of inverted index data #[mockall::automock] #[async_trait] -pub trait InvertedIndexReader { +pub trait InvertedIndexReader: Send { /// Retrieve metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result; diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 77cc61d7c21e..6163607f0d1c 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -131,7 +131,6 @@ mod tests { async fn test_read_payload() { let meta = InvertedIndexMeta { name: "test".to_string(), - segment_row_count: 4096, ..Default::default() }; @@ -145,14 +144,12 @@ mod tests { assert_eq!(metas.metas.len(), 1); let index_meta = &metas.metas.get("test").unwrap(); assert_eq!(index_meta.name, "test"); - assert_eq!(index_meta.segment_row_count, 4096); } #[tokio::test] async fn test_invalid_footer_payload_size() { let meta = InvertedIndexMeta { name: "test".to_string(), - segment_row_count: 4096, ..Default::default() }; @@ -171,7 +168,6 @@ mod tests { name: "test".to_string(), base_offset: 0, inverted_index_size: 1, // Set size to 1 to make ecceed the blob size - segment_row_count: 4096, ..Default::default() }; diff --git a/src/index/src/inverted_index/search.rs b/src/index/src/inverted_index/search.rs index e4ab3d5c3b70..8e28440c7e28 100644 --- a/src/index/src/inverted_index/search.rs +++ b/src/index/src/inverted_index/search.rs @@ -14,4 +14,5 @@ pub mod fst_apply; pub mod fst_values_mapper; +pub mod index_apply; pub mod predicate; diff --git a/src/index/src/inverted_index/search/fst_apply.rs b/src/index/src/inverted_index/search/fst_apply.rs index 32b71b2bf3d9..9f54d0d88918 100644 --- a/src/index/src/inverted_index/search/fst_apply.rs +++ b/src/index/src/inverted_index/search/fst_apply.rs @@ -22,6 +22,7 @@ use crate::inverted_index::FstMap; /// A trait for objects that can process a finite state transducer (FstMap) and return /// associated values. +#[mockall::automock] pub trait FstApplier: Send + Sync { /// Retrieves values from an FstMap. /// diff --git a/src/index/src/inverted_index/search/index_apply.rs b/src/index/src/inverted_index/search/index_apply.rs new file mode 100644 index 000000000000..24ce2b0e92bc --- /dev/null +++ b/src/index/src/inverted_index/search/index_apply.rs @@ -0,0 +1,32 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod predicates_apply; + +use async_trait::async_trait; +pub use predicates_apply::PredicatesIndexApplier; + +use crate::inverted_index::error::Result; +use crate::inverted_index::format::reader::InvertedIndexReader; + +/// A trait for processing and transforming indices obtained from an inverted index. +/// +/// Applier instances are reusable and work with various `InvertedIndexReader` instances, +/// avoiding repeated compilation of fixed predicates such as regex patterns. +#[async_trait] +pub trait IndexApplier { + /// Applies the predefined predicates to the data read by the given index reader, returning + /// a list of relevant indices (e.g., post IDs, group IDs, row IDs). + async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result>; +} diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs new file mode 100644 index 000000000000..a830f28c47ce --- /dev/null +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -0,0 +1,292 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; +use common_base::BitVec; +use greptime_proto::v1::index::InvertedIndexMetas; +use snafu::OptionExt; + +use crate::inverted_index::error::{Result, TagNotFoundInIndexSnafu}; +use crate::inverted_index::format::reader::InvertedIndexReader; +use crate::inverted_index::search::fst_apply::{ + FstApplier, IntersectionFstApplier, KeysFstApplier, +}; +use crate::inverted_index::search::fst_values_mapper::FstValuesMapper; +use crate::inverted_index::search::index_apply::IndexApplier; +use crate::inverted_index::search::predicate::Predicate; + +/// `PredicatesIndexApplier` contains a collection of `FstApplier`s, each associated with a tag field, +/// to process and filter index data based on compiled predicates. +pub struct PredicatesIndexApplier { + /// A list of `FstApplier`s, each associated with a tag field. + fst_appliers: Vec<(String, Box)>, +} + +#[async_trait] +impl IndexApplier for PredicatesIndexApplier { + /// Applies all `FstApplier`s to the data in the index, intersecting the individual + /// bitmaps obtained for each tag to result in a final set of indices. + async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result> { + let metadata = reader.metadata().await?; + + let mut bitmap = Self::bitmap_full(&metadata); // scan all + for (tag_name, fst_applier) in &self.fst_appliers { + if bitmap.count_ones() == 0 { + break; + } + + // TODO(zhongzc): how to handle missing columns? + let meta = metadata + .metas + .get(tag_name) + .with_context(|| TagNotFoundInIndexSnafu { + tag_name: tag_name.to_owned(), + })?; + + let fst = reader.fst(meta).await?; + let values = fst_applier.apply(&fst); + + let mut mapper = FstValuesMapper::new(&mut *reader, meta); + let bm = mapper.map_values(&values).await?; + + bitmap &= bm; + } + + Ok(bitmap.iter_ones().collect()) + } +} + +impl PredicatesIndexApplier { + /// Constructs an instance of `PredicatesIndexApplier` based on a list of tag predicates. + /// Chooses an appropriate `FstApplier` for each tag based on the nature of its predicates. + pub fn try_from(predicates: Vec<(String, Vec)>) -> Result { + let mut fst_appliers = Vec::with_capacity(predicates.len()); + + for (tag_name, predicates) in predicates { + if predicates.is_empty() { + continue; + } + + let exists_in_list = predicates.iter().any(|p| matches!(p, Predicate::InList(_))); + let fst_applier = if exists_in_list { + Box::new(KeysFstApplier::try_from(predicates)?) as _ + } else { + Box::new(IntersectionFstApplier::try_from(predicates)?) as _ + }; + + fst_appliers.push((tag_name, fst_applier)); + } + + Ok(PredicatesIndexApplier { fst_appliers }) + } + + /// Creates a BitVec representing the full range of data in the index for initial scanning. + fn bitmap_full(metadata: &InvertedIndexMetas) -> BitVec { + let total_count = metadata.total_row_count; + let segment_count = metadata.segment_row_count; + let len = (total_count + segment_count - 1) / segment_count; + BitVec::repeat(true, len as _) + } +} + +impl TryFrom)>> for PredicatesIndexApplier { + type Error = crate::inverted_index::error::Error; + fn try_from(predicates: Vec<(String, Vec)>) -> Result { + Self::try_from(predicates) + } +} + +#[cfg(test)] +mod tests { + use common_base::bit_vec::prelude::*; + use greptime_proto::v1::index::InvertedIndexMeta; + + use super::*; + use crate::inverted_index::error::Error; + use crate::inverted_index::format::reader::MockInvertedIndexReader; + use crate::inverted_index::search::fst_apply::MockFstApplier; + use crate::inverted_index::FstMap; + + fn mock_metas<'a>(tags: impl IntoIterator) -> InvertedIndexMetas { + let mut metas = InvertedIndexMetas { + total_row_count: 8, + segment_row_count: 1, + ..Default::default() + }; + for tag in tags.into_iter() { + let meta = InvertedIndexMeta { + name: "".to_owned(), + ..Default::default() + }; + metas.metas.insert(tag.into(), meta); + } + metas + } + + #[tokio::test] + async fn test_index_applier_apply_point_get() { + // A point get fst applier + let mut mock_fst_applier = MockFstApplier::new(); + mock_fst_applier + .expect_apply() + .returning(|fst| fst.get("tag0_value0").into_iter().collect()); + + // A index applier that applies point get "tag0_value0" to tag "tag0" + let applier = PredicatesIndexApplier { + fst_appliers: vec![("tag0".to_owned(), Box::new(mock_fst_applier))], + }; + + // index reader with a single tag "tag0" and a single value "tag0_value0" + let mut mock_reader = MockInvertedIndexReader::new(); + mock_reader + .expect_metadata() + .returning(|| Ok(mock_metas(["tag0"]))); + mock_reader + .expect_fst() + .returning(|meta| match meta.name.as_str() { + "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", (2 << 32) | 1)]).unwrap()), + _ => unreachable!(), + }); + mock_reader.expect_bitmap().returning(|meta, offset, size| { + match (meta.name.as_str(), offset, size) { + ("tag0", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), + _ => unreachable!(), + } + }); + let indices = applier.apply(&mut mock_reader).await.unwrap(); + assert_eq!(indices, vec![0, 2, 4, 6]); + + // index reader with a single tag "tag0" and without value "tag0_value0" + let mut mock_reader = MockInvertedIndexReader::new(); + mock_reader + .expect_metadata() + .returning(|| Ok(mock_metas(["tag0"]))); + mock_reader + .expect_fst() + .returning(|meta| match meta.name.as_str() { + "tag0" => Ok(FstMap::from_iter([(b"tag0_value1", (2 << 32) | 1)]).unwrap()), + _ => unreachable!(), + }); + let indices = applier.apply(&mut mock_reader).await.unwrap(); + assert!(indices.is_empty()); + + // // index reader without tag "tag0" + // let mut mock_reader = MockInvertedIndexReader::new(); + // mock_reader + // .expect_metadata() + // .returning(|| Ok(mock_metas(["tag1"]))); + // let result = applier.apply(&mut mock_reader).await; + // assert!(matches!(result, Err(Error::TagNotFoundInIndex { .. }))); + } + + #[tokio::test] + async fn test_index_applier_apply_intersection_with_two_tags() { + // A point get fst applier for tag "tag0" + let mut mock_fst_applier = MockFstApplier::new(); + mock_fst_applier + .expect_apply() + .returning(|fst| fst.get("tag0_value0").into_iter().collect()); + // Another point get fst applier for tag "tag1" + let mut mock_fst_applier2 = MockFstApplier::new(); + mock_fst_applier2 + .expect_apply() + .returning(|fst| fst.get("tag1_value0").into_iter().collect()); + + // A index applier that intersects point get "tag0_value0" and "tag1_value0" + let applier = PredicatesIndexApplier { + fst_appliers: vec![ + ("tag0".to_owned(), Box::new(mock_fst_applier)), + ("tag1".to_owned(), Box::new(mock_fst_applier2)), + ], + }; + + // index reader with two tags "tag0" and "tag1" and a single value "tag0_value0" and "tag1_value0" + let mut mock_reader = MockInvertedIndexReader::new(); + mock_reader + .expect_metadata() + .returning(|| Ok(mock_metas(["tag0", "tag1"]))); + mock_reader + .expect_fst() + .returning(|meta| match meta.name.as_str() { + "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", (1 << 32) | 1)]).unwrap()), + "tag1" => Ok(FstMap::from_iter([(b"tag1_value0", (2 << 32) | 1)]).unwrap()), + _ => unreachable!(), + }); + mock_reader.expect_bitmap().returning(|meta, offset, size| { + match (meta.name.as_str(), offset, size) { + ("tag0", 1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), + ("tag1", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]), + _ => unreachable!(), + } + }); + + let indices = applier.apply(&mut mock_reader).await.unwrap(); + assert_eq!(indices, vec![0, 4, 6]); + } + + #[tokio::test] + async fn test_index_applier_without_predicates() { + let applier = PredicatesIndexApplier { + fst_appliers: vec![], + }; + + let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new(); + mock_reader + .expect_metadata() + .returning(|| Ok(mock_metas(["tag0"]))); + + let indices = applier.apply(&mut mock_reader).await.unwrap(); + assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // scan all + } + + #[tokio::test] + async fn test_index_applier_with_empty_index() { + let mut mock_reader = MockInvertedIndexReader::new(); + mock_reader.expect_metadata().returning(move || { + Ok(InvertedIndexMetas { + total_row_count: 0, // No rows + segment_row_count: 1, + ..Default::default() + }) + }); + + let mut mock_fst_applier = MockFstApplier::new(); + mock_fst_applier.expect_apply().never(); + + let applier = PredicatesIndexApplier { + fst_appliers: vec![("tag0".to_owned(), Box::new(mock_fst_applier))], + }; + + let indices = applier.apply(&mut mock_reader).await.unwrap(); + assert!(indices.is_empty()); + } + + #[tokio::test] + async fn test_index_applier_with_nonexistent_tag() { + let mut mock_reader = MockInvertedIndexReader::new(); + mock_reader + .expect_metadata() + .returning(|| Ok(mock_metas(vec![]))); + + let mut mock_fst_applier = MockFstApplier::new(); + mock_fst_applier.expect_apply().never(); + + let applier = PredicatesIndexApplier { + fst_appliers: vec![("tag0".to_owned(), Box::new(mock_fst_applier))], + }; + + let result = applier.apply(&mut mock_reader).await; + assert!(matches!(result, Err(Error::TagNotFoundInIndex { .. }))); + } +} From 65efce377814c22271a398fce371a609ebb8f2b8 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Fri, 1 Dec 2023 11:14:48 +0000 Subject: [PATCH 08/29] refactor: bitmap_full -> bitmap_full_range Signed-off-by: Zhenchi --- .../inverted_index/search/index_apply/predicates_apply.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index a830f28c47ce..b63613ae2f73 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -40,7 +40,7 @@ impl IndexApplier for PredicatesIndexApplier { async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result> { let metadata = reader.metadata().await?; - let mut bitmap = Self::bitmap_full(&metadata); // scan all + let mut bitmap = Self::bitmap_full_range(&metadata); for (tag_name, fst_applier) in &self.fst_appliers { if bitmap.count_ones() == 0 { break; @@ -91,8 +91,8 @@ impl PredicatesIndexApplier { Ok(PredicatesIndexApplier { fst_appliers }) } - /// Creates a BitVec representing the full range of data in the index for initial scanning. - fn bitmap_full(metadata: &InvertedIndexMetas) -> BitVec { + /// Creates a `BitVec` representing the full range of data in the index for initial scanning. + fn bitmap_full_range(metadata: &InvertedIndexMetas) -> BitVec { let total_count = metadata.total_row_count; let segment_count = metadata.segment_row_count; let len = (total_count + segment_count - 1) / segment_count; From 98e1a401141e40b8ab3aefb1a480890975cad9d7 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Sat, 2 Dec 2023 10:27:20 +0000 Subject: [PATCH 09/29] feat: add check for segment_row_count Signed-off-by: Zhenchi --- src/index/src/inverted_index/error.rs | 4 ++++ src/index/src/inverted_index/format/reader/footer.rs | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 92663859ede7..24eaed0774e9 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -64,6 +64,9 @@ pub enum Error { payload_size: u64, }, + #[snafu(display("Unexpected zero segment row count"))] + UnexpectedZeroSegmentRowCount { location: Location }, + #[snafu(display("Failed to decode fst"))] DecodeFst { #[snafu(source)] @@ -124,6 +127,7 @@ impl ErrorExt for Error { Seek { .. } | Read { .. } | UnexpectedFooterPayloadSize { .. } + | UnexpectedZeroSegmentRowCount { .. } | UnexpectedOffsetSize { .. } | UnexpectedBlobSize { .. } | DecodeProto { .. } diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 6163607f0d1c..2e9039a28263 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -21,7 +21,7 @@ use snafu::{ensure, ResultExt}; use crate::inverted_index::error::{ DecodeProtoSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedFooterPayloadSizeSnafu, - UnexpectedOffsetSizeSnafu, + UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu, }; use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE; @@ -85,6 +85,11 @@ impl InvertedIndeFooterReader { /// Check if the read metadata is consistent with expected sizes and offsets. fn validate_metas(&self, metas: &InvertedIndexMetas, payload_size: u64) -> Result<()> { + ensure!( + metas.segment_row_count > 0, + UnexpectedZeroSegmentRowCountSnafu + ); + for meta in metas.metas.values() { let InvertedIndexMeta { base_offset, From 654aa9896056d8db28d7c0a0f4b9b32826b8d574 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Sat, 2 Dec 2023 10:28:36 +0000 Subject: [PATCH 10/29] fix: remove redundant code Signed-off-by: Zhenchi --- .../inverted_index/search/index_apply/predicates_apply.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index b63613ae2f73..356fb0fa990e 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -180,14 +180,6 @@ mod tests { }); let indices = applier.apply(&mut mock_reader).await.unwrap(); assert!(indices.is_empty()); - - // // index reader without tag "tag0" - // let mut mock_reader = MockInvertedIndexReader::new(); - // mock_reader - // .expect_metadata() - // .returning(|| Ok(mock_metas(["tag1"]))); - // let result = applier.apply(&mut mock_reader).await; - // assert!(matches!(result, Err(Error::TagNotFoundInIndex { .. }))); } #[tokio::test] From c906353295015037c131e15c3acc96c63f8c507d Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 06:42:05 +0000 Subject: [PATCH 11/29] fix: reader test Signed-off-by: Zhenchi --- src/index/src/inverted_index/format/reader/blob.rs | 6 +++++- src/index/src/inverted_index/format/reader/footer.rs | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index 40509e46bafb..ba8231194055 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -143,7 +143,11 @@ mod tests { }; // metas - let mut metas = InvertedIndexMetas::default(); + let mut metas = InvertedIndexMetas { + total_row_count: 10, + segment_row_count: 1, + ..Default::default() + }; metas.metas.insert(meta.name.clone(), meta); metas.metas.insert(meta1.name.clone(), meta1); let mut meta_buf = Vec::new(); diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 2e9039a28263..478352ee685a 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -121,7 +121,10 @@ mod tests { use super::*; fn create_test_payload(meta: InvertedIndexMeta) -> Vec { - let mut metas = InvertedIndexMetas::default(); + let mut metas = InvertedIndexMetas { + segment_row_count: 1, + ..Default::default() + }; metas.metas.insert("test".to_string(), meta); let mut payload_buf = vec![]; From 40534ab95a57302d60bc9ec604c55de461982799 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 09:37:51 +0000 Subject: [PATCH 12/29] chore: match error in test Signed-off-by: Zhenchi --- src/index/src/inverted_index/search/fst_apply/keys_apply.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs index 23503a97b195..c3a9fb312266 100644 --- a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs +++ b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs @@ -283,7 +283,7 @@ mod tests { }, })]; let result = KeysFstApplier::try_from(predicates); - assert!(result.is_err()); + assert!(matches!(result, Err(Error::KeysApplierWithoutInList { .. }))); } #[test] From eef919f6976863ca1f2a451e162f60e47ba5a0c9 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 09:40:58 +0000 Subject: [PATCH 13/29] fix: fmt Signed-off-by: Zhenchi --- src/index/src/inverted_index/search/fst_apply/keys_apply.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs index c3a9fb312266..a5ed84dce7ce 100644 --- a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs +++ b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs @@ -283,7 +283,10 @@ mod tests { }, })]; let result = KeysFstApplier::try_from(predicates); - assert!(matches!(result, Err(Error::KeysApplierWithoutInList { .. }))); + assert!(matches!( + result, + Err(Error::KeysApplierWithoutInList { .. }) + )); } #[test] From 08fd331e7be8416510cdba037c494d21d575db42 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 09:44:21 +0000 Subject: [PATCH 14/29] refactor: add helper function to construct fst value Signed-off-by: Zhenchi --- .../src/inverted_index/search/fst_values_mapper.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs index a881b5683c61..cc46e48e3e08 100644 --- a/src/index/src/inverted_index/search/fst_values_mapper.rs +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -71,6 +71,10 @@ mod tests { use super::*; use crate::inverted_index::format::reader::MockInvertedIndexReader; + fn value(offset: u32, size: u32) -> u64 { + ((offset as u64) << 32) | (size as u64) + } + #[tokio::test] async fn test_map_values() { let mut mock_reader = MockInvertedIndexReader::new(); @@ -88,20 +92,20 @@ mod tests { let result = values_mapper.map_values(&[]).await.unwrap(); assert_eq!(result.count_ones(), 0); - let result = values_mapper.map_values(&[(1 << 32) | 1]).await.unwrap(); + let result = values_mapper.map_values(&[value(1, 1)]).await.unwrap(); assert_eq!(result, bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]); - let result = values_mapper.map_values(&[(2 << 32) | 1]).await.unwrap(); + let result = values_mapper.map_values(&[value(2, 1)]).await.unwrap(); assert_eq!(result, bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]); let result = values_mapper - .map_values(&[(1 << 32) | 1, (2 << 32) | 1]) + .map_values(&[value(1, 1), value(2, 1)]) .await .unwrap(); assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]); let result = values_mapper - .map_values(&[(2 << 32) | 1, (1 << 32) | 1]) + .map_values(&[value(2, 1), value(1, 1)]) .await .unwrap(); assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]); From 52b3ae45c9e5a118d80fc97498ffd13961fd81ae Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 12:27:09 +0000 Subject: [PATCH 15/29] refactor: polish unit tests Signed-off-by: Zhenchi --- .../search/fst_apply/keys_apply.rs | 2 +- .../search/index_apply/predicates_apply.rs | 59 +++++++++---------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs index a5ed84dce7ce..4ec5710a3435 100644 --- a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs +++ b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs @@ -89,7 +89,7 @@ impl KeysFstApplier { fn get_list(p: &Predicate) -> &HashSet { match p { Predicate::InList(i) => &i.list, - _ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists + _ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists` } } diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 356fb0fa990e..5c39221b85a6 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -134,20 +134,30 @@ mod tests { metas } - #[tokio::test] - async fn test_index_applier_apply_point_get() { - // A point get fst applier + fn key_fst_applier(value: &'static str) -> Box { let mut mock_fst_applier = MockFstApplier::new(); mock_fst_applier .expect_apply() - .returning(|fst| fst.get("tag0_value0").into_iter().collect()); + .returning(move |fst| fst.get(value).into_iter().collect()); + Box::new(mock_fst_applier) + } + + fn fst_value(offset: u64, size: u64) -> u64 { + (offset << 32) | size + } + + fn s(s: &'static str) -> String { + s.to_owned() + } - // A index applier that applies point get "tag0_value0" to tag "tag0" + #[tokio::test] + async fn test_index_applier_apply_point_get() { + // An index applier that point-gets "tag0_value0" on tag "tag0" let applier = PredicatesIndexApplier { - fst_appliers: vec![("tag0".to_owned(), Box::new(mock_fst_applier))], + fst_appliers: vec![(s("tag0"), key_fst_applier("tag0_value0"))], }; - // index reader with a single tag "tag0" and a single value "tag0_value0" + // An index reader with a single tag "tag0" and a corresponding value "tag0_value0" let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() @@ -155,7 +165,7 @@ mod tests { mock_reader .expect_fst() .returning(|meta| match meta.name.as_str() { - "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", (2 << 32) | 1)]).unwrap()), + "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); mock_reader.expect_bitmap().returning(|meta, offset, size| { @@ -167,7 +177,7 @@ mod tests { let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 2, 4, 6]); - // index reader with a single tag "tag0" and without value "tag0_value0" + // An index reader with a single tag "tag0" but without value "tag0_value0" let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() @@ -175,7 +185,7 @@ mod tests { mock_reader .expect_fst() .returning(|meta| match meta.name.as_str() { - "tag0" => Ok(FstMap::from_iter([(b"tag0_value1", (2 << 32) | 1)]).unwrap()), + "tag0" => Ok(FstMap::from_iter([(b"tag0_value1", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); let indices = applier.apply(&mut mock_reader).await.unwrap(); @@ -184,26 +194,15 @@ mod tests { #[tokio::test] async fn test_index_applier_apply_intersection_with_two_tags() { - // A point get fst applier for tag "tag0" - let mut mock_fst_applier = MockFstApplier::new(); - mock_fst_applier - .expect_apply() - .returning(|fst| fst.get("tag0_value0").into_iter().collect()); - // Another point get fst applier for tag "tag1" - let mut mock_fst_applier2 = MockFstApplier::new(); - mock_fst_applier2 - .expect_apply() - .returning(|fst| fst.get("tag1_value0").into_iter().collect()); - - // A index applier that intersects point get "tag0_value0" and "tag1_value0" + // An index applier that intersects "tag0_value0" on tag "tag0" and "tag1_value0" on tag "tag1" let applier = PredicatesIndexApplier { fst_appliers: vec![ - ("tag0".to_owned(), Box::new(mock_fst_applier)), - ("tag1".to_owned(), Box::new(mock_fst_applier2)), + (s("tag0"), key_fst_applier("tag0_value0")), + (s("tag1"), key_fst_applier("tag1_value0")), ], }; - // index reader with two tags "tag0" and "tag1" and a single value "tag0_value0" and "tag1_value0" + // An index reader with two tags "tag0" and "tag1" and respective values "tag0_value0" and "tag1_value0" let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() @@ -211,8 +210,8 @@ mod tests { mock_reader .expect_fst() .returning(|meta| match meta.name.as_str() { - "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", (1 << 32) | 1)]).unwrap()), - "tag1" => Ok(FstMap::from_iter([(b"tag1_value0", (2 << 32) | 1)]).unwrap()), + "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", fst_value(1, 1))]).unwrap()), + "tag1" => Ok(FstMap::from_iter([(b"tag1_value0", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); mock_reader.expect_bitmap().returning(|meta, offset, size| { @@ -239,7 +238,7 @@ mod tests { .returning(|| Ok(mock_metas(["tag0"]))); let indices = applier.apply(&mut mock_reader).await.unwrap(); - assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // scan all + assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan } #[tokio::test] @@ -257,7 +256,7 @@ mod tests { mock_fst_applier.expect_apply().never(); let applier = PredicatesIndexApplier { - fst_appliers: vec![("tag0".to_owned(), Box::new(mock_fst_applier))], + fst_appliers: vec![(s("tag0"), Box::new(mock_fst_applier))], }; let indices = applier.apply(&mut mock_reader).await.unwrap(); @@ -275,7 +274,7 @@ mod tests { mock_fst_applier.expect_apply().never(); let applier = PredicatesIndexApplier { - fst_appliers: vec![("tag0".to_owned(), Box::new(mock_fst_applier))], + fst_appliers: vec![(s("tag0"), Box::new(mock_fst_applier))], }; let result = applier.apply(&mut mock_reader).await; From 69cf37859e1e2bdc9d3782703f83a9d79acbc297 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 13:17:54 +0000 Subject: [PATCH 16/29] refactor: bytemuck to extract offset and size Signed-off-by: Zhenchi --- Cargo.lock | 1 + Cargo.toml | 1 + src/index/Cargo.toml | 1 + src/index/src/inverted_index/search/fst_values_mapper.rs | 5 ++--- src/promql/Cargo.toml | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 925a08753219..81a76954e332 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3940,6 +3940,7 @@ name = "index" version = "0.4.4" dependencies = [ "async-trait", + "bytemuck", "common-base", "common-error", "common-macro", diff --git a/Cargo.toml b/Cargo.toml index 74b8965e5069..9df7a9881d2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,7 @@ opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.gi "metrics", "trace", ] } +bytemuck = "1.12" parquet = "47.0" paste = "1.0" pin-project = "1.0" diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index e19240ae286e..481ed931ec00 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -17,6 +17,7 @@ prost.workspace = true regex-automata.workspace = true regex.workspace = true snafu.workspace = true +bytemuck.workspace = true [dev-dependencies] tokio-util.workspace = true diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs index cc46e48e3e08..d4675e652d18 100644 --- a/src/index/src/inverted_index/search/fst_values_mapper.rs +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -44,8 +44,7 @@ impl<'a> FstValuesMapper<'a> { for value in values { // relative_offset (higher 32 bits), size (lower 32 bits) - let relative_offset = (value >> 32) as u32; - let size = *value as u32; + let [relative_offset, size] = bytemuck::cast::(*value); let bm = self .reader @@ -72,7 +71,7 @@ mod tests { use crate::inverted_index::format::reader::MockInvertedIndexReader; fn value(offset: u32, size: u32) -> u64 { - ((offset as u64) << 32) | (size as u64) + bytemuck::cast::<[u32; 2], u64>([offset, size]) } #[tokio::test] diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index fb4984b6e071..a10973d4ebc1 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true [dependencies] async-recursion = "1.0" async-trait.workspace = true -bytemuck = "1.12" +bytemuck.workspace = true catalog.workspace = true common-catalog.workspace = true common-error.workspace = true From 65e47b776e9baa90af6c3f2c320a5e3bc8259252 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 13:18:22 +0000 Subject: [PATCH 17/29] fix: toml format Signed-off-by: Zhenchi --- Cargo.toml | 2 +- src/index/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9df7a9881d2d..04774aef4185 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,6 +74,7 @@ async-trait = "0.1" base64 = "0.21" bigdecimal = "0.4.2" bitflags = "2.4.1" +bytemuck = "1.12" chrono = { version = "0.4", features = ["serde"] } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } @@ -100,7 +101,6 @@ opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.gi "metrics", "trace", ] } -bytemuck = "1.12" parquet = "47.0" paste = "1.0" pin-project = "1.0" diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 481ed931ec00..800420164ac5 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -6,6 +6,7 @@ license.workspace = true [dependencies] async-trait.workspace = true +bytemuck.workspace = true common-base.workspace = true common-error.workspace = true common-macro.workspace = true @@ -17,7 +18,6 @@ prost.workspace = true regex-automata.workspace = true regex.workspace = true snafu.workspace = true -bytemuck.workspace = true [dev-dependencies] tokio-util.workspace = true From 6c8a2ed49c58f47708cfc334abb367c4a902ea39 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 13:24:24 +0000 Subject: [PATCH 18/29] refactor: use bytemuck Signed-off-by: Zhenchi --- .../src/inverted_index/search/index_apply/predicates_apply.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 5c39221b85a6..4741f0c19f9a 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -142,8 +142,8 @@ mod tests { Box::new(mock_fst_applier) } - fn fst_value(offset: u64, size: u64) -> u64 { - (offset << 32) | size + fn fst_value(offset: u32, size: u32) -> u64 { + bytemuck::cast::<_, u64>([offset, size]) } fn s(s: &'static str) -> String { From 6b59dc932c19a58fe3ffd05a8912d80b8170f1b0 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 13:32:31 +0000 Subject: [PATCH 19/29] refactor: reorg value in unit tests Signed-off-by: Zhenchi --- .../search/index_apply/predicates_apply.rs | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 4741f0c19f9a..194c853f804b 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -118,7 +118,11 @@ mod tests { use crate::inverted_index::search::fst_apply::MockFstApplier; use crate::inverted_index::FstMap; - fn mock_metas<'a>(tags: impl IntoIterator) -> InvertedIndexMetas { + fn s(s: &'static str) -> String { + s.to_owned() + } + + fn mock_metas(tags: impl IntoIterator) -> InvertedIndexMetas { let mut metas = InvertedIndexMetas { total_row_count: 8, segment_row_count: 1, @@ -126,10 +130,10 @@ mod tests { }; for tag in tags.into_iter() { let meta = InvertedIndexMeta { - name: "".to_owned(), + name: s(tag), ..Default::default() }; - metas.metas.insert(tag.into(), meta); + metas.metas.insert(s(tag), meta); } metas } @@ -146,46 +150,42 @@ mod tests { bytemuck::cast::<_, u64>([offset, size]) } - fn s(s: &'static str) -> String { - s.to_owned() - } - #[tokio::test] async fn test_index_applier_apply_point_get() { - // An index applier that point-gets "tag0_value0" on tag "tag0" + // An index applier that point-gets "tag-0_value-0" on tag "tag-0" let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag0"), key_fst_applier("tag0_value0"))], + fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], }; - // An index reader with a single tag "tag0" and a corresponding value "tag0_value0" + // An index reader with a single tag "tag-0" and a corresponding value "tag-0_value-0" let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag0"]))); + .returning(|| Ok(mock_metas(["tag-0"]))); mock_reader .expect_fst() .returning(|meta| match meta.name.as_str() { - "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", fst_value(2, 1))]).unwrap()), + "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); mock_reader.expect_bitmap().returning(|meta, offset, size| { match (meta.name.as_str(), offset, size) { - ("tag0", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), + ("tag-0", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), _ => unreachable!(), } }); let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 2, 4, 6]); - // An index reader with a single tag "tag0" but without value "tag0_value0" + // An index reader with a single tag "tag-0" but without value "tag-0_value-0" let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag0"]))); + .returning(|| Ok(mock_metas(["tag-0"]))); mock_reader .expect_fst() .returning(|meta| match meta.name.as_str() { - "tag0" => Ok(FstMap::from_iter([(b"tag0_value1", fst_value(2, 1))]).unwrap()), + "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); let indices = applier.apply(&mut mock_reader).await.unwrap(); @@ -194,30 +194,30 @@ mod tests { #[tokio::test] async fn test_index_applier_apply_intersection_with_two_tags() { - // An index applier that intersects "tag0_value0" on tag "tag0" and "tag1_value0" on tag "tag1" + // An index applier that intersects "tag-0_value-0" on tag "tag-0" and "tag-1_value-a" on tag "tag-1" let applier = PredicatesIndexApplier { fst_appliers: vec![ - (s("tag0"), key_fst_applier("tag0_value0")), - (s("tag1"), key_fst_applier("tag1_value0")), + (s("tag-0"), key_fst_applier("tag-0_value-0")), + (s("tag-1"), key_fst_applier("tag-1_value-a")), ], }; - // An index reader with two tags "tag0" and "tag1" and respective values "tag0_value0" and "tag1_value0" + // An index reader with two tags "tag-0" and "tag-1" and respective values "tag-0_value-0" and "tag-1_value-a" let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag0", "tag1"]))); + .returning(|| Ok(mock_metas(["tag-0", "tag-1"]))); mock_reader .expect_fst() .returning(|meta| match meta.name.as_str() { - "tag0" => Ok(FstMap::from_iter([(b"tag0_value0", fst_value(1, 1))]).unwrap()), - "tag1" => Ok(FstMap::from_iter([(b"tag1_value0", fst_value(2, 1))]).unwrap()), + "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(1, 1))]).unwrap()), + "tag-1" => Ok(FstMap::from_iter([(b"tag-1_value-a", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); mock_reader.expect_bitmap().returning(|meta, offset, size| { match (meta.name.as_str(), offset, size) { - ("tag0", 1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), - ("tag1", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]), + ("tag-0", 1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), + ("tag-1", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]), _ => unreachable!(), } }); @@ -235,7 +235,7 @@ mod tests { let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag0"]))); + .returning(|| Ok(mock_metas(["tag-0"]))); let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan @@ -256,7 +256,7 @@ mod tests { mock_fst_applier.expect_apply().never(); let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag0"), Box::new(mock_fst_applier))], + fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], }; let indices = applier.apply(&mut mock_reader).await.unwrap(); @@ -274,7 +274,7 @@ mod tests { mock_fst_applier.expect_apply().never(); let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag0"), Box::new(mock_fst_applier))], + fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], }; let result = applier.apply(&mut mock_reader).await; From 7488fa2780732e992980486f358620a20e008e28 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 4 Dec 2023 13:43:21 +0000 Subject: [PATCH 20/29] chore: update proto Signed-off-by: Zhenchi --- Cargo.lock | 2 +- src/common/meta/src/rpc/ddl.rs | 1 + src/meta-srv/src/handler/region_lease_handler.rs | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index be32226e47a8..edf926bbf123 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3570,7 +3570,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/zhongzc/greptime-proto.git?branch=zhongzc/index-add-row-count#6ff29913b75179b0f212eb2bf53cece6229f1aaa" +source = "git+https://github.com/zhongzc/greptime-proto.git?branch=zhongzc/index-add-row-count#a4f2b699b7b3b704c002d73833f66b3abe606c5f" dependencies = [ "prost 0.12.2", "serde", diff --git a/src/common/meta/src/rpc/ddl.rs b/src/common/meta/src/rpc/ddl.rs index 0dfa5f507156..d832a106c25c 100644 --- a/src/common/meta/src/rpc/ddl.rs +++ b/src/common/meta/src/rpc/ddl.rs @@ -118,6 +118,7 @@ impl TryFrom for PbSubmitDdlTaskRequest { schema_name: task.schema, table_name: task.table, table_id: Some(api::v1::TableId { id: task.table_id }), + drop_if_exists: false, }), }), DdlTask::AlterTable(task) => Task::AlterTableTask(PbAlterTableTask { diff --git a/src/meta-srv/src/handler/region_lease_handler.rs b/src/meta-srv/src/handler/region_lease_handler.rs index 18a29fad0378..1102bee0e6ab 100644 --- a/src/meta-srv/src/handler/region_lease_handler.rs +++ b/src/meta-srv/src/handler/region_lease_handler.rs @@ -167,6 +167,7 @@ impl HeartbeatHandler for RegionLeaseHandler { .collect::>(), duration_since_epoch: req.duration_since_epoch, lease_seconds: self.region_lease_seconds, + closeable_region_ids: vec![], }); Ok(HandleControl::Continue) From 6a89045947bfe78a988fd91537172ed6305bc30f Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 01:59:37 +0000 Subject: [PATCH 21/29] chore: add a TODO reminder to consider optimizing the order of apply Signed-off-by: Zhenchi --- .../src/inverted_index/search/index_apply/predicates_apply.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 194c853f804b..8c8611c0c791 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -41,6 +41,7 @@ impl IndexApplier for PredicatesIndexApplier { let metadata = reader.metadata().await?; let mut bitmap = Self::bitmap_full_range(&metadata); + // TODO(zhongzc): optimize the order of applying for (tag_name, fst_applier) in &self.fst_appliers { if bitmap.count_ones() == 0 { break; From ba482188cecc5c91fb8476315e9724f40f577e66 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 02:23:57 +0000 Subject: [PATCH 22/29] refactor: InList predicates are applied first to benefit from higher selectivity Signed-off-by: Zhenchi --- .../search/index_apply/predicates_apply.rs | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 8c8611c0c791..79c7db573470 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -71,21 +71,25 @@ impl IndexApplier for PredicatesIndexApplier { impl PredicatesIndexApplier { /// Constructs an instance of `PredicatesIndexApplier` based on a list of tag predicates. /// Chooses an appropriate `FstApplier` for each tag based on the nature of its predicates. - pub fn try_from(predicates: Vec<(String, Vec)>) -> Result { + pub fn try_from(mut predicates: Vec<(String, Vec)>) -> Result { let mut fst_appliers = Vec::with_capacity(predicates.len()); - for (tag_name, predicates) in predicates { + // InList predicates are applied first to benefit from higher selectivity. + let in_list_index = predicates + .iter_mut() + .partition_in_place(|(_, ps)| ps.iter().any(|p| matches!(p, Predicate::InList(_)))); + let mut iter = predicates.into_iter(); + for _ in 0..in_list_index { + let (tag_name, predicates) = iter.next().unwrap(); + let fst_applier = Box::new(KeysFstApplier::try_from(predicates)?) as _; + fst_appliers.push((tag_name, fst_applier)); + } + + for (tag_name, predicates) in iter { if predicates.is_empty() { continue; } - - let exists_in_list = predicates.iter().any(|p| matches!(p, Predicate::InList(_))); - let fst_applier = if exists_in_list { - Box::new(KeysFstApplier::try_from(predicates)?) as _ - } else { - Box::new(IntersectionFstApplier::try_from(predicates)?) as _ - }; - + let fst_applier = Box::new(IntersectionFstApplier::try_from(predicates)?) as _; fst_appliers.push((tag_name, fst_applier)); } From 3c5b58c2716382711efb020e23a7e3f281a1036d Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 02:55:29 +0000 Subject: [PATCH 23/29] chore: update proto Signed-off-by: Zhenchi --- Cargo.lock | 2 +- Cargo.toml | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index edf926bbf123..8594ed748454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3570,7 +3570,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/zhongzc/greptime-proto.git?branch=zhongzc/index-add-row-count#a4f2b699b7b3b704c002d73833f66b3abe606c5f" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=b1d403088f02136bcebde53d604f491c260ca8e2#b1d403088f02136bcebde53d604f491c260ca8e2" dependencies = [ "prost 0.12.2", "serde", diff --git a/Cargo.toml b/Cargo.toml index 4bbc7257c569..a5ec4a69ff10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,7 @@ etcd-client = "0.12" fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "2b3ae45740a49ec6a0830d71fc09c3093aeb5fe7" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b1d403088f02136bcebde53d604f491c260ca8e2" } humantime-serde = "1.1" itertools = "0.10" lazy_static = "1.4" @@ -197,6 +197,3 @@ strip = true lto = "thin" debug = false incremental = false - -[patch.'https://github.com/GreptimeTeam/greptime-proto.git'] -greptime-proto = { git = "https://github.com/zhongzc/greptime-proto.git", branch = "zhongzc/index-add-row-count" } From d87becbe609b7362387835c22ed04f616f0b971a Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 04:54:35 +0000 Subject: [PATCH 24/29] feat: add read options to control the behavior of index not found Signed-off-by: Zhenchi --- src/index/src/inverted_index/error.rs | 9 +- .../src/inverted_index/search/index_apply.rs | 29 ++++- .../search/index_apply/predicates_apply.rs | 100 ++++++++++++++---- 3 files changed, 109 insertions(+), 29 deletions(-) diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 24eaed0774e9..065e2d29991e 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -113,11 +113,8 @@ pub enum Error { predicates: Vec, }, - #[snafu(display("Tag not found in index, tag_name: {tag_name}"))] - TagNotFoundInIndex { - tag_name: String, - location: Location, - }, + #[snafu(display("index not found, name: {name}"))] + IndexNotFound { name: String, location: Location }, } impl ErrorExt for Error { @@ -139,7 +136,7 @@ impl ErrorExt for Error { | KeysApplierWithoutInList { .. } | IntersectionApplierWithInList { .. } | EmptyPredicates { .. } - | TagNotFoundInIndex { .. } => StatusCode::InvalidArguments, + | IndexNotFound { .. } => StatusCode::InvalidArguments, } } diff --git a/src/index/src/inverted_index/search/index_apply.rs b/src/index/src/inverted_index/search/index_apply.rs index 24ce2b0e92bc..eb66edb50670 100644 --- a/src/index/src/inverted_index/search/index_apply.rs +++ b/src/index/src/inverted_index/search/index_apply.rs @@ -28,5 +28,32 @@ use crate::inverted_index::format::reader::InvertedIndexReader; pub trait IndexApplier { /// Applies the predefined predicates to the data read by the given index reader, returning /// a list of relevant indices (e.g., post IDs, group IDs, row IDs). - async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result>; + /// + /// The `options` parameter controls the behavior of the applier. + async fn apply( + &self, + reader: &mut dyn InvertedIndexReader, + options: ReadOptions, + ) -> Result>; +} + +/// Options for reading an inverted index. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] +pub struct ReadOptions { + /// `index_not_found_strategy` controls the behavior of the applier when the index is not found. + pub index_not_found_strategy: IndexNotFoundStrategy, +} + +/// Defines the behavior of an applier when the index is not found. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] +pub enum IndexNotFoundStrategy { + /// If the index is not found, the applier will return an empty list of indices. + #[default] + ReturnEmpty, + + /// If the index is not found, the applier will return a list of all indices. + ReturnFullRange, + + /// If the index is not found, the applier will return an error. + ReturnError, } diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 79c7db573470..2509abe37d48 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -15,15 +15,16 @@ use async_trait::async_trait; use common_base::BitVec; use greptime_proto::v1::index::InvertedIndexMetas; -use snafu::OptionExt; -use crate::inverted_index::error::{Result, TagNotFoundInIndexSnafu}; +use crate::inverted_index::error::{IndexNotFoundSnafu, Result}; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::search::fst_apply::{ FstApplier, IntersectionFstApplier, KeysFstApplier, }; use crate::inverted_index::search::fst_values_mapper::FstValuesMapper; -use crate::inverted_index::search::index_apply::IndexApplier; +use crate::inverted_index::search::index_apply::{ + IndexApplier, IndexNotFoundStrategy, ReadOptions, +}; use crate::inverted_index::search::predicate::Predicate; /// `PredicatesIndexApplier` contains a collection of `FstApplier`s, each associated with a tag field, @@ -37,23 +38,34 @@ pub struct PredicatesIndexApplier { impl IndexApplier for PredicatesIndexApplier { /// Applies all `FstApplier`s to the data in the index, intersecting the individual /// bitmaps obtained for each tag to result in a final set of indices. - async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result> { + async fn apply( + &self, + reader: &mut dyn InvertedIndexReader, + options: ReadOptions, + ) -> Result> { let metadata = reader.metadata().await?; let mut bitmap = Self::bitmap_full_range(&metadata); - // TODO(zhongzc): optimize the order of applying - for (tag_name, fst_applier) in &self.fst_appliers { + // TODO(zhongzc): optimize the order of applying to make it quicker to return empty. + for (name, fst_applier) in &self.fst_appliers { if bitmap.count_ones() == 0 { break; } - // TODO(zhongzc): how to handle missing columns? - let meta = metadata - .metas - .get(tag_name) - .with_context(|| TagNotFoundInIndexSnafu { - tag_name: tag_name.to_owned(), - })?; + let meta = match metadata.metas.get(name) { + Some(meta) => meta, + None => match options.index_not_found_strategy { + IndexNotFoundStrategy::ReturnEmpty => { + return Ok(vec![]); + } + IndexNotFoundStrategy::ReturnFullRange => { + continue; + } + IndexNotFoundStrategy::ReturnError => { + return IndexNotFoundSnafu { name }.fail(); + } + }, + }; let fst = reader.fst(meta).await?; let values = fst_applier.apply(&fst); @@ -156,7 +168,7 @@ mod tests { } #[tokio::test] - async fn test_index_applier_apply_point_get() { + async fn test_index_applier_apply_get_key() { // An index applier that point-gets "tag-0_value-0" on tag "tag-0" let applier = PredicatesIndexApplier { fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], @@ -179,7 +191,10 @@ mod tests { _ => unreachable!(), } }); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(&mut mock_reader, ReadOptions::default()) + .await + .unwrap(); assert_eq!(indices, vec![0, 2, 4, 6]); // An index reader with a single tag "tag-0" but without value "tag-0_value-0" @@ -193,7 +208,10 @@ mod tests { "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(&mut mock_reader, ReadOptions::default()) + .await + .unwrap(); assert!(indices.is_empty()); } @@ -227,7 +245,10 @@ mod tests { } }); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(&mut mock_reader, ReadOptions::default()) + .await + .unwrap(); assert_eq!(indices, vec![0, 4, 6]); } @@ -242,7 +263,10 @@ mod tests { .expect_metadata() .returning(|| Ok(mock_metas(["tag-0"]))); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(&mut mock_reader, ReadOptions::default()) + .await + .unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan } @@ -264,12 +288,15 @@ mod tests { fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], }; - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(&mut mock_reader, ReadOptions::default()) + .await + .unwrap(); assert!(indices.is_empty()); } #[tokio::test] - async fn test_index_applier_with_nonexistent_tag() { + async fn test_index_applier_with_nonexistent_index() { let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() @@ -282,7 +309,36 @@ mod tests { fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], }; - let result = applier.apply(&mut mock_reader).await; - assert!(matches!(result, Err(Error::TagNotFoundInIndex { .. }))); + let result = applier + .apply( + &mut mock_reader, + ReadOptions { + index_not_found_strategy: IndexNotFoundStrategy::ReturnError, + }, + ) + .await; + assert!(matches!(result, Err(Error::IndexNotFound { .. }))); + + let indices = applier + .apply( + &mut mock_reader, + ReadOptions { + index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, + }, + ) + .await + .unwrap(); + assert!(indices.is_empty()); + + let indices = applier + .apply( + &mut mock_reader, + ReadOptions { + index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, + }, + ) + .await + .unwrap(); + assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); } } From ec319b0a1eaf0f70dc1fa236cede195e20811ab8 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 04:57:58 +0000 Subject: [PATCH 25/29] refactor: polish Signed-off-by: Zhenchi --- .../inverted_index/search/index_apply/predicates_apply.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 2509abe37d48..b02ce9180281 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -52,9 +52,8 @@ impl IndexApplier for PredicatesIndexApplier { break; } - let meta = match metadata.metas.get(name) { - Some(meta) => meta, - None => match options.index_not_found_strategy { + let Some(meta) = metadata.metas.get(name) else { + match options.index_not_found_strategy { IndexNotFoundStrategy::ReturnEmpty => { return Ok(vec![]); } @@ -64,7 +63,7 @@ impl IndexApplier for PredicatesIndexApplier { IndexNotFoundStrategy::ReturnError => { return IndexNotFoundSnafu { name }.fail(); } - }, + } }; let fst = reader.fst(meta).await?; From 91a1338c766d9a0af5eafad2fd71c749b6f5c5ad Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 05:16:08 +0000 Subject: [PATCH 26/29] refactor: move read options to implementation instead of trait Signed-off-by: Zhenchi --- .../src/inverted_index/search/index_apply.rs | 29 +--- .../search/index_apply/predicates_apply.rs | 132 +++++++++--------- 2 files changed, 66 insertions(+), 95 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply.rs b/src/index/src/inverted_index/search/index_apply.rs index eb66edb50670..24ce2b0e92bc 100644 --- a/src/index/src/inverted_index/search/index_apply.rs +++ b/src/index/src/inverted_index/search/index_apply.rs @@ -28,32 +28,5 @@ use crate::inverted_index::format::reader::InvertedIndexReader; pub trait IndexApplier { /// Applies the predefined predicates to the data read by the given index reader, returning /// a list of relevant indices (e.g., post IDs, group IDs, row IDs). - /// - /// The `options` parameter controls the behavior of the applier. - async fn apply( - &self, - reader: &mut dyn InvertedIndexReader, - options: ReadOptions, - ) -> Result>; -} - -/// Options for reading an inverted index. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] -pub struct ReadOptions { - /// `index_not_found_strategy` controls the behavior of the applier when the index is not found. - pub index_not_found_strategy: IndexNotFoundStrategy, -} - -/// Defines the behavior of an applier when the index is not found. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] -pub enum IndexNotFoundStrategy { - /// If the index is not found, the applier will return an empty list of indices. - #[default] - ReturnEmpty, - - /// If the index is not found, the applier will return a list of all indices. - ReturnFullRange, - - /// If the index is not found, the applier will return an error. - ReturnError, + async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result>; } diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index b02ce9180281..4676faac7ec9 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -22,9 +22,7 @@ use crate::inverted_index::search::fst_apply::{ FstApplier, IntersectionFstApplier, KeysFstApplier, }; use crate::inverted_index::search::fst_values_mapper::FstValuesMapper; -use crate::inverted_index::search::index_apply::{ - IndexApplier, IndexNotFoundStrategy, ReadOptions, -}; +use crate::inverted_index::search::index_apply::IndexApplier; use crate::inverted_index::search::predicate::Predicate; /// `PredicatesIndexApplier` contains a collection of `FstApplier`s, each associated with a tag field, @@ -32,17 +30,16 @@ use crate::inverted_index::search::predicate::Predicate; pub struct PredicatesIndexApplier { /// A list of `FstApplier`s, each associated with a tag field. fst_appliers: Vec<(String, Box)>, + + /// Options for reading an inverted index (e.g., how to handle missing index). + options: ReadOptions, } #[async_trait] impl IndexApplier for PredicatesIndexApplier { /// Applies all `FstApplier`s to the data in the index, intersecting the individual /// bitmaps obtained for each tag to result in a final set of indices. - async fn apply( - &self, - reader: &mut dyn InvertedIndexReader, - options: ReadOptions, - ) -> Result> { + async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result> { let metadata = reader.metadata().await?; let mut bitmap = Self::bitmap_full_range(&metadata); @@ -53,14 +50,14 @@ impl IndexApplier for PredicatesIndexApplier { } let Some(meta) = metadata.metas.get(name) else { - match options.index_not_found_strategy { + match self.options.index_not_found_strategy { IndexNotFoundStrategy::ReturnEmpty => { return Ok(vec![]); } IndexNotFoundStrategy::ReturnFullRange => { continue; } - IndexNotFoundStrategy::ReturnError => { + IndexNotFoundStrategy::ThrowError => { return IndexNotFoundSnafu { name }.fail(); } } @@ -82,7 +79,13 @@ impl IndexApplier for PredicatesIndexApplier { impl PredicatesIndexApplier { /// Constructs an instance of `PredicatesIndexApplier` based on a list of tag predicates. /// Chooses an appropriate `FstApplier` for each tag based on the nature of its predicates. - pub fn try_from(mut predicates: Vec<(String, Vec)>) -> Result { + /// + /// * `predicates`: A list of tag predicates. + /// * `options`: Options for reading an inverted index (e.g., how to handle missing index). + pub fn try_new( + mut predicates: Vec<(String, Vec)>, + options: ReadOptions, + ) -> Result { let mut fst_appliers = Vec::with_capacity(predicates.len()); // InList predicates are applied first to benefit from higher selectivity. @@ -104,7 +107,10 @@ impl PredicatesIndexApplier { fst_appliers.push((tag_name, fst_applier)); } - Ok(PredicatesIndexApplier { fst_appliers }) + Ok(PredicatesIndexApplier { + fst_appliers, + options, + }) } /// Creates a `BitVec` representing the full range of data in the index for initial scanning. @@ -116,11 +122,25 @@ impl PredicatesIndexApplier { } } -impl TryFrom)>> for PredicatesIndexApplier { - type Error = crate::inverted_index::error::Error; - fn try_from(predicates: Vec<(String, Vec)>) -> Result { - Self::try_from(predicates) - } +/// Options for reading an inverted index. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] +pub struct ReadOptions { + /// `index_not_found_strategy` controls the behavior of the applier when the index is not found. + pub index_not_found_strategy: IndexNotFoundStrategy, +} + +/// Defines the behavior of an applier when the index is not found. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] +pub enum IndexNotFoundStrategy { + /// If the index is not found, the applier will return an empty list of indices. + #[default] + ReturnEmpty, + + /// If the index is not found, the applier will return a list of all indices. + ReturnFullRange, + + /// If the index is not found, the applier will throw an error. + ThrowError, } #[cfg(test)] @@ -171,6 +191,7 @@ mod tests { // An index applier that point-gets "tag-0_value-0" on tag "tag-0" let applier = PredicatesIndexApplier { fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], + options: ReadOptions::default(), }; // An index reader with a single tag "tag-0" and a corresponding value "tag-0_value-0" @@ -190,10 +211,7 @@ mod tests { _ => unreachable!(), } }); - let indices = applier - .apply(&mut mock_reader, ReadOptions::default()) - .await - .unwrap(); + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 2, 4, 6]); // An index reader with a single tag "tag-0" but without value "tag-0_value-0" @@ -207,10 +225,7 @@ mod tests { "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); - let indices = applier - .apply(&mut mock_reader, ReadOptions::default()) - .await - .unwrap(); + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert!(indices.is_empty()); } @@ -222,6 +237,7 @@ mod tests { (s("tag-0"), key_fst_applier("tag-0_value-0")), (s("tag-1"), key_fst_applier("tag-1_value-a")), ], + options: ReadOptions::default(), }; // An index reader with two tags "tag-0" and "tag-1" and respective values "tag-0_value-0" and "tag-1_value-a" @@ -244,10 +260,7 @@ mod tests { } }); - let indices = applier - .apply(&mut mock_reader, ReadOptions::default()) - .await - .unwrap(); + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 4, 6]); } @@ -255,6 +268,7 @@ mod tests { async fn test_index_applier_without_predicates() { let applier = PredicatesIndexApplier { fst_appliers: vec![], + options: ReadOptions::default(), }; let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new(); @@ -262,10 +276,7 @@ mod tests { .expect_metadata() .returning(|| Ok(mock_metas(["tag-0"]))); - let indices = applier - .apply(&mut mock_reader, ReadOptions::default()) - .await - .unwrap(); + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan } @@ -285,12 +296,10 @@ mod tests { let applier = PredicatesIndexApplier { fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], + options: ReadOptions::default(), }; - let indices = applier - .apply(&mut mock_reader, ReadOptions::default()) - .await - .unwrap(); + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert!(indices.is_empty()); } @@ -301,43 +310,32 @@ mod tests { .expect_metadata() .returning(|| Ok(mock_metas(vec![]))); - let mut mock_fst_applier = MockFstApplier::new(); - mock_fst_applier.expect_apply().never(); - let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], + fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], + options: ReadOptions { + index_not_found_strategy: IndexNotFoundStrategy::ThrowError, + }, }; - let result = applier - .apply( - &mut mock_reader, - ReadOptions { - index_not_found_strategy: IndexNotFoundStrategy::ReturnError, - }, - ) - .await; + let result = applier.apply(&mut mock_reader).await; assert!(matches!(result, Err(Error::IndexNotFound { .. }))); - let indices = applier - .apply( - &mut mock_reader, - ReadOptions { - index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, - }, - ) - .await - .unwrap(); + let applier = PredicatesIndexApplier { + fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], + options: ReadOptions { + index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, + }, + }; + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert!(indices.is_empty()); - let indices = applier - .apply( - &mut mock_reader, - ReadOptions { - index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, - }, - ) - .await - .unwrap(); + let applier = PredicatesIndexApplier { + fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], + options: ReadOptions { + index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, + }, + }; + let indices = applier.apply(&mut mock_reader).await.unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); } } From 3f444a462c6e6f48ea9435bc4d4f9f57368dee23 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 06:35:17 +0000 Subject: [PATCH 27/29] feat: add SearchContext, refine doc comments Signed-off-by: Zhenchi --- .../src/inverted_index/search/index_apply.rs | 10 ++- .../search/index_apply/predicates_apply.rs | 71 +++++++++++++------ 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply.rs b/src/index/src/inverted_index/search/index_apply.rs index 24ce2b0e92bc..a4e920308287 100644 --- a/src/index/src/inverted_index/search/index_apply.rs +++ b/src/index/src/inverted_index/search/index_apply.rs @@ -20,6 +20,10 @@ pub use predicates_apply::PredicatesIndexApplier; use crate::inverted_index::error::Result; use crate::inverted_index::format::reader::InvertedIndexReader; +/// A context for searching the inverted index. +#[derive(Clone, Debug, Default)] +pub struct SearchContext {} + /// A trait for processing and transforming indices obtained from an inverted index. /// /// Applier instances are reusable and work with various `InvertedIndexReader` instances, @@ -28,5 +32,9 @@ use crate::inverted_index::format::reader::InvertedIndexReader; pub trait IndexApplier { /// Applies the predefined predicates to the data read by the given index reader, returning /// a list of relevant indices (e.g., post IDs, group IDs, row IDs). - async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result>; + async fn apply( + &self, + context: SearchContext, + reader: &mut dyn InvertedIndexReader, + ) -> Result>; } diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 4676faac7ec9..b95fab5e018c 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -16,6 +16,7 @@ use async_trait::async_trait; use common_base::BitVec; use greptime_proto::v1::index::InvertedIndexMetas; +use super::SearchContext; use crate::inverted_index::error::{IndexNotFoundSnafu, Result}; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::search::fst_apply::{ @@ -25,11 +26,14 @@ use crate::inverted_index::search::fst_values_mapper::FstValuesMapper; use crate::inverted_index::search::index_apply::IndexApplier; use crate::inverted_index::search::predicate::Predicate; -/// `PredicatesIndexApplier` contains a collection of `FstApplier`s, each associated with a tag field, +type IndexName = String; + +/// `PredicatesIndexApplier` contains a collection of `FstApplier`s, each associated with an index name, /// to process and filter index data based on compiled predicates. pub struct PredicatesIndexApplier { - /// A list of `FstApplier`s, each associated with a tag field. - fst_appliers: Vec<(String, Box)>, + /// A list of `FstApplier`s, each associated with a specific index name + /// (e.g. a tag field uses its column name as index name) + fst_appliers: Vec<(IndexName, Box)>, /// Options for reading an inverted index (e.g., how to handle missing index). options: ReadOptions, @@ -37,9 +41,13 @@ pub struct PredicatesIndexApplier { #[async_trait] impl IndexApplier for PredicatesIndexApplier { - /// Applies all `FstApplier`s to the data in the index, intersecting the individual - /// bitmaps obtained for each tag to result in a final set of indices. - async fn apply(&self, reader: &mut dyn InvertedIndexReader) -> Result> { + /// Applies all `FstApplier`s to the data in the inverted index reader, intersecting the individual + /// bitmaps obtained for each index to result in a final set of indices. + async fn apply( + &self, + _context: SearchContext, + reader: &mut dyn InvertedIndexReader, + ) -> Result> { let metadata = reader.metadata().await?; let mut bitmap = Self::bitmap_full_range(&metadata); @@ -78,12 +86,12 @@ impl IndexApplier for PredicatesIndexApplier { impl PredicatesIndexApplier { /// Constructs an instance of `PredicatesIndexApplier` based on a list of tag predicates. - /// Chooses an appropriate `FstApplier` for each tag based on the nature of its predicates. + /// Chooses an appropriate `FstApplier` for each index name based on the nature of its predicates. /// - /// * `predicates`: A list of tag predicates. + /// * `predicates`: A list of predicates for each index name. /// * `options`: Options for reading an inverted index (e.g., how to handle missing index). pub fn try_new( - mut predicates: Vec<(String, Vec)>, + mut predicates: Vec<(IndexName, Vec)>, options: ReadOptions, ) -> Result { let mut fst_appliers = Vec::with_capacity(predicates.len()); @@ -132,14 +140,14 @@ pub struct ReadOptions { /// Defines the behavior of an applier when the index is not found. #[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] pub enum IndexNotFoundStrategy { - /// If the index is not found, the applier will return an empty list of indices. + /// Return an empty list of indices. #[default] ReturnEmpty, - /// If the index is not found, the applier will return a list of all indices. + /// Return the full range of indices. ReturnFullRange, - /// If the index is not found, the applier will throw an error. + /// Throw an error. ThrowError, } @@ -211,7 +219,10 @@ mod tests { _ => unreachable!(), } }); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert_eq!(indices, vec![0, 2, 4, 6]); // An index reader with a single tag "tag-0" but without value "tag-0_value-0" @@ -225,7 +236,10 @@ mod tests { "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert!(indices.is_empty()); } @@ -260,7 +274,10 @@ mod tests { } }); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert_eq!(indices, vec![0, 4, 6]); } @@ -276,7 +293,10 @@ mod tests { .expect_metadata() .returning(|| Ok(mock_metas(["tag-0"]))); - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); // full range to scan } @@ -299,7 +319,10 @@ mod tests { options: ReadOptions::default(), }; - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert!(indices.is_empty()); } @@ -317,7 +340,9 @@ mod tests { }, }; - let result = applier.apply(&mut mock_reader).await; + let result = applier + .apply(SearchContext::default(), &mut mock_reader) + .await; assert!(matches!(result, Err(Error::IndexNotFound { .. }))); let applier = PredicatesIndexApplier { @@ -326,7 +351,10 @@ mod tests { index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, }, }; - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert!(indices.is_empty()); let applier = PredicatesIndexApplier { @@ -335,7 +363,10 @@ mod tests { index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, }, }; - let indices = applier.apply(&mut mock_reader).await.unwrap(); + let indices = applier + .apply(SearchContext::default(), &mut mock_reader) + .await + .unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); } } From ccac231a937a36c20b1548fcdefd19060c9c17f8 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 07:14:03 +0000 Subject: [PATCH 28/29] feat: move index_not_found_strategy as a field of SearchContext Signed-off-by: Zhenchi --- .../src/inverted_index/search/index_apply.rs | 25 ++++- .../search/index_apply/predicates_apply.rs | 94 +++++++------------ 2 files changed, 55 insertions(+), 64 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply.rs b/src/index/src/inverted_index/search/index_apply.rs index a4e920308287..4e297c89d2dc 100644 --- a/src/index/src/inverted_index/search/index_apply.rs +++ b/src/index/src/inverted_index/search/index_apply.rs @@ -20,10 +20,6 @@ pub use predicates_apply::PredicatesIndexApplier; use crate::inverted_index::error::Result; use crate::inverted_index::format::reader::InvertedIndexReader; -/// A context for searching the inverted index. -#[derive(Clone, Debug, Default)] -pub struct SearchContext {} - /// A trait for processing and transforming indices obtained from an inverted index. /// /// Applier instances are reusable and work with various `InvertedIndexReader` instances, @@ -38,3 +34,24 @@ pub trait IndexApplier { reader: &mut dyn InvertedIndexReader, ) -> Result>; } + +/// A context for searching the inverted index. +#[derive(Clone, Debug, Eq, PartialEq, Default)] +pub struct SearchContext { + /// `index_not_found_strategy` controls the behavior of the applier when the index is not found. + pub index_not_found_strategy: IndexNotFoundStrategy, +} + +/// Defines the behavior of an applier when the index is not found. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] +pub enum IndexNotFoundStrategy { + /// Return an empty list of indices. + #[default] + ReturnEmpty, + + /// Return the full range of indices. + ReturnFullRange, + + /// Throw an error. + ThrowError, +} diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index b95fab5e018c..fd8e3726f810 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -16,14 +16,15 @@ use async_trait::async_trait; use common_base::BitVec; use greptime_proto::v1::index::InvertedIndexMetas; -use super::SearchContext; use crate::inverted_index::error::{IndexNotFoundSnafu, Result}; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::search::fst_apply::{ FstApplier, IntersectionFstApplier, KeysFstApplier, }; use crate::inverted_index::search::fst_values_mapper::FstValuesMapper; -use crate::inverted_index::search::index_apply::IndexApplier; +use crate::inverted_index::search::index_apply::{ + IndexApplier, IndexNotFoundStrategy, SearchContext, +}; use crate::inverted_index::search::predicate::Predicate; type IndexName = String; @@ -34,9 +35,6 @@ pub struct PredicatesIndexApplier { /// A list of `FstApplier`s, each associated with a specific index name /// (e.g. a tag field uses its column name as index name) fst_appliers: Vec<(IndexName, Box)>, - - /// Options for reading an inverted index (e.g., how to handle missing index). - options: ReadOptions, } #[async_trait] @@ -45,7 +43,7 @@ impl IndexApplier for PredicatesIndexApplier { /// bitmaps obtained for each index to result in a final set of indices. async fn apply( &self, - _context: SearchContext, + context: SearchContext, reader: &mut dyn InvertedIndexReader, ) -> Result> { let metadata = reader.metadata().await?; @@ -58,7 +56,7 @@ impl IndexApplier for PredicatesIndexApplier { } let Some(meta) = metadata.metas.get(name) else { - match self.options.index_not_found_strategy { + match context.index_not_found_strategy { IndexNotFoundStrategy::ReturnEmpty => { return Ok(vec![]); } @@ -87,13 +85,7 @@ impl IndexApplier for PredicatesIndexApplier { impl PredicatesIndexApplier { /// Constructs an instance of `PredicatesIndexApplier` based on a list of tag predicates. /// Chooses an appropriate `FstApplier` for each index name based on the nature of its predicates. - /// - /// * `predicates`: A list of predicates for each index name. - /// * `options`: Options for reading an inverted index (e.g., how to handle missing index). - pub fn try_new( - mut predicates: Vec<(IndexName, Vec)>, - options: ReadOptions, - ) -> Result { + pub fn try_from(mut predicates: Vec<(IndexName, Vec)>) -> Result { let mut fst_appliers = Vec::with_capacity(predicates.len()); // InList predicates are applied first to benefit from higher selectivity. @@ -115,10 +107,7 @@ impl PredicatesIndexApplier { fst_appliers.push((tag_name, fst_applier)); } - Ok(PredicatesIndexApplier { - fst_appliers, - options, - }) + Ok(PredicatesIndexApplier { fst_appliers }) } /// Creates a `BitVec` representing the full range of data in the index for initial scanning. @@ -130,25 +119,11 @@ impl PredicatesIndexApplier { } } -/// Options for reading an inverted index. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] -pub struct ReadOptions { - /// `index_not_found_strategy` controls the behavior of the applier when the index is not found. - pub index_not_found_strategy: IndexNotFoundStrategy, -} - -/// Defines the behavior of an applier when the index is not found. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] -pub enum IndexNotFoundStrategy { - /// Return an empty list of indices. - #[default] - ReturnEmpty, - - /// Return the full range of indices. - ReturnFullRange, - - /// Throw an error. - ThrowError, +impl TryFrom)>> for PredicatesIndexApplier { + type Error = crate::inverted_index::error::Error; + fn try_from(predicates: Vec<(String, Vec)>) -> Result { + Self::try_from(predicates) + } } #[cfg(test)] @@ -199,7 +174,6 @@ mod tests { // An index applier that point-gets "tag-0_value-0" on tag "tag-0" let applier = PredicatesIndexApplier { fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], - options: ReadOptions::default(), }; // An index reader with a single tag "tag-0" and a corresponding value "tag-0_value-0" @@ -251,7 +225,6 @@ mod tests { (s("tag-0"), key_fst_applier("tag-0_value-0")), (s("tag-1"), key_fst_applier("tag-1_value-a")), ], - options: ReadOptions::default(), }; // An index reader with two tags "tag-0" and "tag-1" and respective values "tag-0_value-0" and "tag-1_value-a" @@ -285,7 +258,6 @@ mod tests { async fn test_index_applier_without_predicates() { let applier = PredicatesIndexApplier { fst_appliers: vec![], - options: ReadOptions::default(), }; let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new(); @@ -316,7 +288,6 @@ mod tests { let applier = PredicatesIndexApplier { fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], - options: ReadOptions::default(), }; let indices = applier @@ -333,38 +304,41 @@ mod tests { .expect_metadata() .returning(|| Ok(mock_metas(vec![]))); + let mut mock_fst_applier = MockFstApplier::new(); + mock_fst_applier.expect_apply().never(); + let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], - options: ReadOptions { - index_not_found_strategy: IndexNotFoundStrategy::ThrowError, - }, + fst_appliers: vec![(s("tag-0"), Box::new(mock_fst_applier))], }; let result = applier - .apply(SearchContext::default(), &mut mock_reader) + .apply( + SearchContext { + index_not_found_strategy: IndexNotFoundStrategy::ThrowError, + }, + &mut mock_reader, + ) .await; assert!(matches!(result, Err(Error::IndexNotFound { .. }))); - let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], - options: ReadOptions { - index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, - }, - }; let indices = applier - .apply(SearchContext::default(), &mut mock_reader) + .apply( + SearchContext { + index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, + }, + &mut mock_reader, + ) .await .unwrap(); assert!(indices.is_empty()); - let applier = PredicatesIndexApplier { - fst_appliers: vec![(s("tag-0"), key_fst_applier("tag-0_value-0"))], - options: ReadOptions { - index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, - }, - }; let indices = applier - .apply(SearchContext::default(), &mut mock_reader) + .apply( + SearchContext { + index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, + }, + &mut mock_reader, + ) .await .unwrap(); assert_eq!(indices, vec![0, 1, 2, 3, 4, 5, 6, 7]); From c05c99e0a45507daa82842fcce2e39e7b67c8585 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 5 Dec 2023 08:07:09 +0000 Subject: [PATCH 29/29] chore: rename varient Signed-off-by: Zhenchi --- src/index/src/inverted_index/search/index_apply.rs | 4 ++-- .../src/inverted_index/search/index_apply/predicates_apply.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply.rs b/src/index/src/inverted_index/search/index_apply.rs index 4e297c89d2dc..35d8c387a2d6 100644 --- a/src/index/src/inverted_index/search/index_apply.rs +++ b/src/index/src/inverted_index/search/index_apply.rs @@ -49,8 +49,8 @@ pub enum IndexNotFoundStrategy { #[default] ReturnEmpty, - /// Return the full range of indices. - ReturnFullRange, + /// Ignore the index and continue. + Ignore, /// Throw an error. ThrowError, diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index fd8e3726f810..e2bea2756a7f 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -60,7 +60,7 @@ impl IndexApplier for PredicatesIndexApplier { IndexNotFoundStrategy::ReturnEmpty => { return Ok(vec![]); } - IndexNotFoundStrategy::ReturnFullRange => { + IndexNotFoundStrategy::Ignore => { continue; } IndexNotFoundStrategy::ThrowError => { @@ -335,7 +335,7 @@ mod tests { let indices = applier .apply( SearchContext { - index_not_found_strategy: IndexNotFoundStrategy::ReturnFullRange, + index_not_found_strategy: IndexNotFoundStrategy::Ignore, }, &mut mock_reader, )