From 30e8a21ec8bdf3045834dfda4a3c598832a88026 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 21 Nov 2024 17:39:30 +0000 Subject: [PATCH] StatsCompute VTable (#1434) Moves stats compute into a VTable on the encoding. --- encodings/alp/src/alp/array.rs | 4 +- encodings/alp/src/alp_rd/array.rs | 4 +- encodings/bytebool/src/stats.rs | 20 +++--- encodings/datetime-parts/src/array.rs | 8 +-- encodings/dict/src/stats.rs | 24 ++++---- encodings/fastlanes/src/bitpacking/mod.rs | 4 +- encodings/fastlanes/src/delta/mod.rs | 4 +- encodings/fastlanes/src/for/mod.rs | 4 +- encodings/fsst/src/array.rs | 4 +- encodings/roaring/src/boolean/stats.rs | 19 +++--- encodings/roaring/src/integer/mod.rs | 8 +-- encodings/runend-bool/src/array.rs | 21 +++---- encodings/runend/src/array.rs | 15 ++--- encodings/zigzag/src/array.rs | 22 +++---- vortex-array/src/array/bool/stats.rs | 59 +++++++++--------- vortex-array/src/array/chunked/stats.rs | 9 +-- vortex-array/src/array/constant/mod.rs | 8 +-- vortex-array/src/array/extension/mod.rs | 10 +-- vortex-array/src/array/null/mod.rs | 10 +-- vortex-array/src/array/primitive/stats.rs | 71 ++++++++++++---------- vortex-array/src/array/sparse/mod.rs | 20 +++--- vortex-array/src/array/struct_/mod.rs | 10 +-- vortex-array/src/array/varbin/stats.rs | 13 ++-- vortex-array/src/array/varbinview/stats.rs | 13 ++-- vortex-array/src/data/owned.rs | 5 +- vortex-array/src/data/viewed.rs | 4 +- vortex-array/src/encoding/mod.rs | 10 ++- vortex-array/src/encoding/opaque.rs | 3 + vortex-array/src/lib.rs | 3 +- vortex-array/src/stats/mod.rs | 24 +++++++- 30 files changed, 240 insertions(+), 193 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index b4fed1287d..2a71040264 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -6,7 +6,7 @@ use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::PrimitiveArray; use vortex_array::encoding::ids; use vortex_array::iter::{Accessor, AccessorRef}; -use vortex_array::stats::ArrayStatisticsCompute; +use vortex_array::stats::StatisticsVTable; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex_array::{ @@ -278,4 +278,4 @@ impl AcceptArrayVisitor for ALPArray { } } -impl ArrayStatisticsCompute for ALPArray {} +impl StatisticsVTable for ALPEncoding {} diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index f7c3f8887c..5e4e2ec0cc 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::{PrimitiveArray, SparseArray}; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatisticsCompute, StatsSet}; +use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical, @@ -259,7 +259,7 @@ impl AcceptArrayVisitor for ALPRDArray { } } -impl ArrayStatisticsCompute for ALPRDArray {} +impl StatisticsVTable for ALPRDEncoding {} impl ArrayTrait for ALPRDArray {} diff --git a/encodings/bytebool/src/stats.rs b/encodings/bytebool/src/stats.rs index 95342924e1..639361f2c7 100644 --- a/encodings/bytebool/src/stats.rs +++ b/encodings/bytebool/src/stats.rs @@ -1,18 +1,24 @@ -use vortex_array::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::{ArrayLen, IntoArrayVariant}; use vortex_error::VortexResult; -use super::ByteBoolArray; +use super::{ByteBoolArray, ByteBoolEncoding}; -impl ArrayStatisticsCompute for ByteBoolArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { - if self.is_empty() { +impl StatisticsVTable for ByteBoolEncoding { + fn compute_statistics(&self, array: &ByteBoolArray, stat: Stat) -> VortexResult { + if array.is_empty() { return Ok(StatsSet::default()); } // TODO(adamgs): This is slightly wasteful and could be optimized in the future - let bools = self.as_ref().clone().into_bool()?; - bools.compute_statistics(stat) + let bools = array.as_ref().clone().into_bool()?; + Ok(StatsSet::from_iter( + bools + .statistics() + .compute(stat) + .into_iter() + .map(|value| (stat, value)), + )) } } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index d1334b7481..6a65aa866e 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -5,7 +5,7 @@ use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::StructArray; use vortex_array::compute::unary::try_cast; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex_array::variants::{ArrayVariants, ExtensionArrayTrait}; use vortex_array::{ @@ -162,10 +162,10 @@ impl AcceptArrayVisitor for DateTimePartsArray { } } -impl ArrayStatisticsCompute for DateTimePartsArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for DateTimePartsEncoding { + fn compute_statistics(&self, array: &DateTimePartsArray, stat: Stat) -> VortexResult { let maybe_stat = match stat { - Stat::NullCount => Some(Scalar::from(self.validity().null_count(self.len())?)), + Stat::NullCount => Some(Scalar::from(array.validity().null_count(array.len())?)), _ => None, }; diff --git a/encodings/dict/src/stats.rs b/encodings/dict/src/stats.rs index 4dcfd4000d..faa42ec1ed 100644 --- a/encodings/dict/src/stats.rs +++ b/encodings/dict/src/stats.rs @@ -1,54 +1,54 @@ -use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_error::VortexResult; -use crate::DictArray; +use crate::{DictArray, DictEncoding}; -impl ArrayStatisticsCompute for DictArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for DictEncoding { + fn compute_statistics(&self, array: &DictArray, stat: Stat) -> VortexResult { let mut stats = StatsSet::default(); match stat { Stat::RunCount => { - if let Some(rc) = self.codes().statistics().compute(Stat::RunCount) { + if let Some(rc) = array.codes().statistics().compute(Stat::RunCount) { stats.set(Stat::RunCount, rc); } } Stat::Min => { - if let Some(min) = self.values().statistics().compute(Stat::Min) { + if let Some(min) = array.values().statistics().compute(Stat::Min) { stats.set(Stat::Min, min); } } Stat::Max => { - if let Some(max) = self.values().statistics().compute(Stat::Max) { + if let Some(max) = array.values().statistics().compute(Stat::Max) { stats.set(Stat::Max, max); } } Stat::IsConstant => { - if let Some(is_constant) = self.codes().statistics().compute(Stat::IsConstant) { + if let Some(is_constant) = array.codes().statistics().compute(Stat::IsConstant) { stats.set(Stat::IsConstant, is_constant); } } Stat::NullCount => { - if let Some(null_count) = self.codes().statistics().compute(Stat::NullCount) { + if let Some(null_count) = array.codes().statistics().compute(Stat::NullCount) { stats.set(Stat::NullCount, null_count); } } Stat::IsSorted | Stat::IsStrictSorted => { // if dictionary is sorted - if self + if array .values() .statistics() .compute_is_sorted() .unwrap_or(false) { if let Some(codes_are_sorted) = - self.codes().statistics().compute(Stat::IsSorted) + array.codes().statistics().compute(Stat::IsSorted) { stats.set(Stat::IsSorted, codes_are_sorted); } if let Some(codes_are_strict_sorted) = - self.codes().statistics().compute(Stat::IsStrictSorted) + array.codes().statistics().compute(Stat::IsStrictSorted) { stats.set(Stat::IsStrictSorted, codes_are_strict_sorted); } diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index 3153f0d0e1..cc6964c693 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -7,7 +7,7 @@ use fastlanes::BitPacking; use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::{PrimitiveArray, SparseArray}; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatisticsCompute, StatsSet}; +use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex_array::{ @@ -228,7 +228,7 @@ impl AcceptArrayVisitor for BitPackedArray { } } -impl ArrayStatisticsCompute for BitPackedArray {} +impl StatisticsVTable for BitPackedEncoding {} impl ArrayTrait for BitPackedArray { fn nbytes(&self) -> usize { diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index 030065b714..1305a007df 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::PrimitiveArray; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatisticsCompute, StatsSet}; +use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex_array::{ @@ -249,4 +249,4 @@ impl AcceptArrayVisitor for DeltaArray { } } -impl ArrayStatisticsCompute for DeltaArray {} +impl StatisticsVTable for DeltaEncoding {} diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 98d7af5b13..65e7d7d972 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -4,7 +4,7 @@ pub use compress::*; use serde::{Deserialize, Serialize}; use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatisticsCompute, StatsSet}; +use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity}; use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex_array::{ @@ -105,7 +105,7 @@ impl AcceptArrayVisitor for FoRArray { } } -impl ArrayStatisticsCompute for FoRArray {} +impl StatisticsVTable for FoREncoding {} impl ArrayTrait for FoRArray { fn nbytes(&self) -> usize { diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 64f7e6dedd..32dd160e40 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::{VarBin, VarBinArray}; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatisticsCompute, StatsSet}; +use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex_array::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait}; use vortex_array::{ @@ -200,7 +200,7 @@ impl AcceptArrayVisitor for FSSTArray { } } -impl ArrayStatisticsCompute for FSSTArray {} +impl StatisticsVTable for FSSTEncoding {} impl ArrayValidity for FSSTArray { fn is_valid(&self, index: usize) -> bool { diff --git a/encodings/roaring/src/boolean/stats.rs b/encodings/roaring/src/boolean/stats.rs index 0893ab5782..9d77a8b89a 100644 --- a/encodings/roaring/src/boolean/stats.rs +++ b/encodings/roaring/src/boolean/stats.rs @@ -1,13 +1,13 @@ -use vortex_array::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{Stat, StatisticsVTable, StatsSet}; use vortex_array::ArrayLen; use vortex_error::{vortex_err, VortexResult}; -use crate::RoaringBoolArray; +use crate::{RoaringBoolArray, RoaringBoolEncoding}; -impl ArrayStatisticsCompute for RoaringBoolArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for RoaringBoolEncoding { + fn compute_statistics(&self, array: &RoaringBoolArray, stat: Stat) -> VortexResult { // Only needs to compute IsSorted, IsStrictSorted and RunCount all other stats have been populated on construction - let bitmap = self.bitmap(); + let bitmap = array.bitmap(); let true_count = bitmap.statistics().cardinality; if matches!( stat, @@ -16,12 +16,12 @@ impl ArrayStatisticsCompute for RoaringBoolArray { return Ok(StatsSet::bools_with_true_and_null_count( true_count as usize, 0, - self.len(), + array.len(), )); } if matches!(stat, Stat::IsSorted | Stat::IsStrictSorted) { - let is_sorted = if true_count == 0 || true_count == self.len() as u64 { + let is_sorted = if true_count == 0 || true_count == array.len() as u64 { true } else { let min_idx = bitmap.minimum().ok_or_else(|| { @@ -30,11 +30,12 @@ impl ArrayStatisticsCompute for RoaringBoolArray { let max_idx = bitmap.maximum().ok_or_else(|| { vortex_err!("Bitmap has no maximum despite having cardinality > 0") })?; - (max_idx as usize + 1 == self.len()) && (max_idx + 1 - min_idx) as u64 == true_count + (max_idx as usize + 1 == array.len()) + && (max_idx + 1 - min_idx) as u64 == true_count }; let is_strict_sorted = - is_sorted && (self.len() <= 1 || (self.len() == 2 && true_count == 1)); + is_sorted && (array.len() <= 1 || (array.len() == 2 && true_count == 1)); return Ok(StatsSet::from_iter([ (Stat::IsSorted, is_sorted.into()), (Stat::IsStrictSorted, is_strict_sorted.into()), diff --git a/encodings/roaring/src/integer/mod.rs b/encodings/roaring/src/integer/mod.rs index 306cecddb2..eadc5c1d31 100644 --- a/encodings/roaring/src/integer/mod.rs +++ b/encodings/roaring/src/integer/mod.rs @@ -8,7 +8,7 @@ use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::PrimitiveArray; use vortex_array::compute::unary::try_cast; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex_array::{ @@ -137,12 +137,12 @@ impl AcceptArrayVisitor for RoaringIntArray { } } -impl ArrayStatisticsCompute for RoaringIntArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for RoaringIntEncoding { + fn compute_statistics(&self, array: &RoaringIntArray, stat: Stat) -> VortexResult { // possibly faster to write an accumulator over the iterator, though not necessarily if stat == Stat::TrailingZeroFreq || stat == Stat::BitWidthFreq || stat == Stat::RunCount { let primitive = - PrimitiveArray::from_vec(self.owned_bitmap().to_vec(), Validity::NonNullable); + PrimitiveArray::from_vec(array.owned_bitmap().to_vec(), Validity::NonNullable); primitive.statistics().compute_all(&[ Stat::TrailingZeroFreq, Stat::BitWidthFreq, diff --git a/encodings/runend-bool/src/array.rs b/encodings/runend-bool/src/array.rs index 8cfb8bb4ef..77661132be 100644 --- a/encodings/runend-bool/src/array.rs +++ b/encodings/runend-bool/src/array.rs @@ -6,7 +6,7 @@ use vortex_array::array::{BoolArray, PrimitiveArray}; use vortex_array::compute::unary::scalar_at; use vortex_array::compute::{search_sorted, SearchSortedSide}; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use vortex_array::variants::{ArrayVariants, BoolArrayTrait, PrimitiveArrayTrait}; use vortex_array::{ @@ -229,17 +229,17 @@ impl AcceptArrayVisitor for RunEndBoolArray { } } -impl ArrayStatisticsCompute for RunEndBoolArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for RunEndBoolEncoding { + fn compute_statistics(&self, array: &RunEndBoolArray, stat: Stat) -> VortexResult { let maybe_scalar: Option = match stat { - Stat::NullCount => Some(self.validity().null_count(self.len())?.into()), + Stat::NullCount => Some(array.validity().null_count(array.len())?.into()), Stat::TrueCount => { - let pends = self.ends().into_primitive()?; + let pends = array.ends().into_primitive()?; let mut true_count: usize = 0; let mut prev_end: usize = 0; - let mut include = self.start(); + let mut include = array.start(); match_each_unsigned_integer_ptype!(pends.ptype(), |$P| { - for end in trimmed_ends_iter(pends.maybe_null_slice::<$P>(), self.offset(), self.len()) { + for end in trimmed_ends_iter(pends.maybe_null_slice::<$P>(), array.offset(), array.len()) { if include { true_count += end - prev_end; } @@ -268,7 +268,7 @@ mod test { use vortex_array::array::{BoolArray, PrimitiveArray}; use vortex_array::compute::unary::scalar_at; use vortex_array::compute::{slice, take, TakeOptions}; - use vortex_array::stats::{ArrayStatistics as _, ArrayStatisticsCompute}; + use vortex_array::stats::ArrayStatistics; use vortex_array::validity::Validity; use vortex_array::{ ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical, ToArrayData, @@ -406,10 +406,9 @@ mod test { Stat::IsStrictSorted, ] { // call compute_statistics directly to avoid caching - let bools_stats = bools.compute_statistics(stat).unwrap(); - let expected = bools_stats.get(stat).unwrap(); + let expected = bools.statistics().compute(stat).unwrap(); let actual = arr.statistics().compute(stat).unwrap(); - assert_eq!(expected, &actual); + assert_eq!(expected, actual); } assert_eq!(arr.statistics().compute_run_count(), Some(ends_len)); diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 76ee3c416f..447e06cdde 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -6,7 +6,7 @@ use vortex_array::array::PrimitiveArray; use vortex_array::compute::unary::scalar_at; use vortex_array::compute::{search_sorted, search_sorted_usize_many, SearchSortedSide}; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use vortex_array::variants::{ArrayVariants, BoolArrayTrait, PrimitiveArrayTrait}; use vortex_array::{ @@ -248,17 +248,18 @@ impl AcceptArrayVisitor for RunEndArray { } } -impl ArrayStatisticsCompute for RunEndArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for RunEndEncoding { + fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult { let maybe_stat = match stat { - Stat::Min | Stat::Max => self.values().statistics().compute(stat), - Stat::NullCount => Some(Scalar::from(self.validity().null_count(self.len())?)), + Stat::Min | Stat::Max => array.values().statistics().compute(stat), + Stat::NullCount => Some(Scalar::from(array.validity().null_count(array.len())?)), Stat::IsSorted => Some(Scalar::from( - self.values() + array + .values() .statistics() .compute_is_sorted() .unwrap_or(false) - && self.logical_validity().all_valid(), + && array.logical_validity().all_valid(), )), _ => None, }; diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index e951bef2fe..443b572a8c 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex_array::array::PrimitiveArray; use vortex_array::encoding::ids; -use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity}; use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex_array::{ @@ -95,17 +95,17 @@ impl AcceptArrayVisitor for ZigZagArray { } } -impl ArrayStatisticsCompute for ZigZagArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for ZigZagEncoding { + fn compute_statistics(&self, array: &ZigZagArray, stat: Stat) -> VortexResult { let mut stats = StatsSet::default(); - // these stats are the same for self and self.encoded() + // these stats are the same for array and array.encoded() if matches!(stat, Stat::IsConstant | Stat::NullCount) { - if let Some(val) = self.encoded().statistics().compute(stat) { + if let Some(val) = array.encoded().statistics().compute(stat) { stats.set(stat, val); } } else if matches!(stat, Stat::Min | Stat::Max) { - let encoded_max = self + let encoded_max = array .encoded() .statistics() .compute_as_cast::(Stat::Max); @@ -113,7 +113,7 @@ impl ArrayStatisticsCompute for ZigZagArray { // the max of the encoded array is the element with the highest absolute value (so either min if negative, or max if positive) let decoded = ::decode(val); let decoded_stat = if decoded < 0 { Stat::Min } else { Stat::Max }; - stats.set(decoded_stat, Scalar::from(decoded).cast(self.dtype())?); + stats.set(decoded_stat, Scalar::from(decoded).cast(array.dtype())?); } } @@ -142,8 +142,8 @@ mod test { let zigzag = ZigZagArray::encode(&array).unwrap(); for stat in [Stat::Max, Stat::NullCount, Stat::IsConstant] { - let stats = zigzag.compute_statistics(stat).unwrap(); - assert_eq!(stats.get(stat), array.statistics().compute(stat).as_ref()); + let value = zigzag.statistics().compute(stat); + assert_eq!(value, array.statistics().compute(stat)); } let sliced = ZigZagArray::try_from(slice(zigzag, 0, 2).unwrap()).unwrap(); @@ -152,8 +152,8 @@ mod test { Scalar::from(-5i32) ); for stat in [Stat::Min, Stat::NullCount, Stat::IsConstant] { - let stats = sliced.compute_statistics(stat).unwrap(); - assert_eq!(stats.get(stat), array.statistics().compute(stat).as_ref()); + let value = sliced.statistics().compute(stat); + assert_eq!(value, array.statistics().compute(stat)); } } } diff --git a/vortex-array/src/array/bool/stats.rs b/vortex-array/src/array/bool/stats.rs index afa1fa30f8..ce251e6801 100644 --- a/vortex-array/src/array/bool/stats.rs +++ b/vortex-array/src/array/bool/stats.rs @@ -5,18 +5,18 @@ use itertools::Itertools; use vortex_dtype::{DType, Nullability}; use vortex_error::VortexResult; -use crate::array::BoolArray; -use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use crate::array::{BoolArray, BoolEncoding}; +use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::{ArrayDType, ArrayLen, ArrayTrait as _, IntoArrayVariant}; -impl ArrayStatisticsCompute for BoolArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for BoolEncoding { + fn compute_statistics(&self, array: &BoolArray, stat: Stat) -> VortexResult { if stat == Stat::UncompressedSizeInBytes { - return Ok(StatsSet::of(stat, self.nbytes())); + return Ok(StatsSet::of(stat, array.nbytes())); } - if self.is_empty() { + if array.is_empty() { return Ok(StatsSet::from_iter([ (Stat::TrueCount, 0.into()), (Stat::NullCount, 0.into()), @@ -24,34 +24,34 @@ impl ArrayStatisticsCompute for BoolArray { ])); } - match self.logical_validity() { - LogicalValidity::AllValid(_) => self.boolean_buffer().compute_statistics(stat), - LogicalValidity::AllInvalid(v) => Ok(StatsSet::nulls(v, self.dtype())), - LogicalValidity::Array(a) => { - NullableBools(&self.boolean_buffer(), &a.into_bool()?.boolean_buffer()) - .compute_statistics(stat) - } + match array.logical_validity() { + LogicalValidity::AllValid(_) => self.compute_statistics(&array.boolean_buffer(), stat), + LogicalValidity::AllInvalid(v) => Ok(StatsSet::nulls(v, array.dtype())), + LogicalValidity::Array(a) => self.compute_statistics( + &NullableBools(&array.boolean_buffer(), &a.into_bool()?.boolean_buffer()), + stat, + ), } } } struct NullableBools<'a>(&'a BooleanBuffer, &'a BooleanBuffer); -impl ArrayStatisticsCompute for NullableBools<'_> { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable> for BoolEncoding { + fn compute_statistics(&self, array: &NullableBools<'_>, stat: Stat) -> VortexResult { // Fast-path if we just want the true-count if matches!( stat, Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant ) { return Ok(StatsSet::bools_with_true_and_null_count( - self.0.bitand(self.1).count_set_bits(), - self.1.count_set_bits(), - self.0.len(), + array.0.bitand(array.1).count_set_bits(), + array.1.count_set_bits(), + array.0.len(), )); } - let first_non_null_idx = self + let first_non_null_idx = array .1 .iter() .enumerate() @@ -60,40 +60,41 @@ impl ArrayStatisticsCompute for NullableBools<'_> { .next(); if let Some(first_non_null) = first_non_null_idx { - let mut acc = BoolStatsAccumulator::new(self.0.value(first_non_null)); + let mut acc = BoolStatsAccumulator::new(array.0.value(first_non_null)); acc.n_nulls(first_non_null); - self.0 + array + .0 .iter() - .zip_eq(self.1.iter()) + .zip_eq(array.1.iter()) .skip(first_non_null + 1) .map(|(next, valid)| valid.then_some(next)) .for_each(|next| acc.nullable_next(next)); Ok(acc.finish()) } else { Ok(StatsSet::nulls( - self.0.len(), + array.0.len(), &DType::Bool(Nullability::Nullable), )) } } } -impl ArrayStatisticsCompute for BooleanBuffer { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for BoolEncoding { + fn compute_statistics(&self, buffer: &BooleanBuffer, stat: Stat) -> VortexResult { // Fast-path if we just want the true-count if matches!( stat, Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant ) { return Ok(StatsSet::bools_with_true_and_null_count( - self.count_set_bits(), + buffer.count_set_bits(), 0, - self.len(), + buffer.len(), )); } - let mut stats = BoolStatsAccumulator::new(self.value(0)); - self.iter().skip(1).for_each(|next| stats.next(next)); + let mut stats = BoolStatsAccumulator::new(buffer.value(0)); + buffer.iter().skip(1).for_each(|next| stats.next(next)); Ok(stats.finish()) } } diff --git a/vortex-array/src/array/chunked/stats.rs b/vortex-array/src/array/chunked/stats.rs index a605c4b210..e54576c06a 100644 --- a/vortex-array/src/array/chunked/stats.rs +++ b/vortex-array/src/array/chunked/stats.rs @@ -1,13 +1,14 @@ use vortex_error::VortexResult; use crate::array::chunked::ChunkedArray; -use crate::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use crate::array::ChunkedEncoding; +use crate::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; -impl ArrayStatisticsCompute for ChunkedArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for ChunkedEncoding { + fn compute_statistics(&self, array: &ChunkedArray, stat: Stat) -> VortexResult { // for UncompressedSizeInBytes, we end up with sum of chunk uncompressed sizes // this ignores the `chunk_offsets` array child, so it won't exactly match self.nbytes() - Ok(self + Ok(array .chunks() .map(|c| { let s = c.statistics(); diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index 71b4432699..1abe1ee372 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -6,7 +6,7 @@ use vortex_scalar::{Scalar, ScalarValue}; use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::encoding::ids; -use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::{impl_encoding, ArrayDType, ArrayLen, ArrayTrait}; @@ -81,9 +81,9 @@ impl ArrayValidity for ConstantArray { } } -impl ArrayStatisticsCompute for ConstantArray { - fn compute_statistics(&self, _stat: Stat) -> VortexResult { - Ok(StatsSet::constant(self.owned_scalar(), self.len())) +impl StatisticsVTable for ConstantEncoding { + fn compute_statistics(&self, array: &ConstantArray, _stat: Stat) -> VortexResult { + Ok(StatsSet::constant(array.owned_scalar(), array.len())) } } diff --git a/vortex-array/src/array/extension/mod.rs b/vortex-array/src/array/extension/mod.rs index 76f42810be..261b6752d6 100644 --- a/vortex-array/src/array/extension/mod.rs +++ b/vortex-array/src/array/extension/mod.rs @@ -8,7 +8,7 @@ use vortex_error::{VortexExpect as _, VortexResult}; use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::encoding::ids; -use crate::stats::{ArrayStatistics as _, ArrayStatisticsCompute, Stat, StatsSet}; +use crate::stats::{ArrayStatistics as _, Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::variants::{ArrayVariants, ExtensionArrayTrait}; use crate::{impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical}; @@ -93,15 +93,15 @@ impl AcceptArrayVisitor for ExtensionArray { } } -impl ArrayStatisticsCompute for ExtensionArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { - let mut stats = self.storage().statistics().compute_all(&[stat])?; +impl StatisticsVTable for ExtensionEncoding { + fn compute_statistics(&self, array: &ExtensionArray, stat: Stat) -> VortexResult { + let mut stats = array.storage().statistics().compute_all(&[stat])?; // for e.g., min/max, we want to cast to the extension array's dtype // for other stats, we don't need to change anything for stat in all::().filter(|s| s.has_same_dtype_as_array()) { if let Some(value) = stats.get(stat) { - stats.set(stat, value.cast(self.dtype())?); + stats.set(stat, value.cast(array.dtype())?); } } diff --git a/vortex-array/src/array/null/mod.rs b/vortex-array/src/array/null/mod.rs index 43ef1c38bf..9fb36ac4b4 100644 --- a/vortex-array/src/array/null/mod.rs +++ b/vortex-array/src/array/null/mod.rs @@ -6,7 +6,7 @@ use vortex_error::{VortexExpect as _, VortexResult}; use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::encoding::ids; -use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, Validity}; use crate::variants::{ArrayVariants, NullArrayTrait}; use crate::{impl_encoding, ArrayLen, ArrayTrait, Canonical, IntoCanonical}; @@ -53,13 +53,13 @@ impl ArrayValidity for NullArray { } } -impl ArrayStatisticsCompute for NullArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for NullEncoding { + fn compute_statistics(&self, array: &NullArray, stat: Stat) -> VortexResult { if stat == Stat::UncompressedSizeInBytes { - return Ok(StatsSet::of(stat, self.nbytes())); + return Ok(StatsSet::of(stat, array.nbytes())); } - Ok(StatsSet::nulls(self.len(), &DType::Null)) + Ok(StatsSet::nulls(array.len(), &DType::Null)) } } diff --git a/vortex-array/src/array/primitive/stats.rs b/vortex-array/src/array/primitive/stats.rs index 98c1494f89..ad85e61ff9 100644 --- a/vortex-array/src/array/primitive/stats.rs +++ b/vortex-array/src/array/primitive/stats.rs @@ -11,7 +11,8 @@ use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; -use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use crate::array::PrimitiveEncoding; +use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::variants::PrimitiveArrayTrait; use crate::{ArrayDType, ArrayTrait as _, IntoArrayVariant}; @@ -20,43 +21,45 @@ trait PStatsType: NativePType + Into + BitWidth {} impl + BitWidth> PStatsType for T {} -impl ArrayStatisticsCompute for PrimitiveArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for PrimitiveEncoding { + fn compute_statistics(&self, array: &PrimitiveArray, stat: Stat) -> VortexResult { if stat == Stat::UncompressedSizeInBytes { - return Ok(StatsSet::of(stat, self.nbytes())); + return Ok(StatsSet::of(stat, array.nbytes())); } - let mut stats = match_each_native_ptype!(self.ptype(), |$P| { - match self.logical_validity() { - LogicalValidity::AllValid(_) => self.maybe_null_slice::<$P>().compute_statistics(stat), - LogicalValidity::AllInvalid(v) => Ok(StatsSet::nulls(v, self.dtype())), - LogicalValidity::Array(a) => NullableValues( - self.maybe_null_slice::<$P>(), - &a.clone().into_bool()?.boolean_buffer(), - ) - .compute_statistics(stat), + let mut stats = match_each_native_ptype!(array.ptype(), |$P| { + match array.logical_validity() { + LogicalValidity::AllValid(_) => self.compute_statistics(array.maybe_null_slice::<$P>(), stat), + LogicalValidity::AllInvalid(v) => Ok(StatsSet::nulls(v, array.dtype())), + LogicalValidity::Array(a) => self.compute_statistics( + &NullableValues( + array.maybe_null_slice::<$P>(), + &a.clone().into_bool()?.boolean_buffer(), + ), + stat + ), } })?; if let Some(min) = stats.get(Stat::Min) { - stats.set(Stat::Min, min.cast(self.dtype())?); + stats.set(Stat::Min, min.cast(array.dtype())?); } if let Some(max) = stats.get(Stat::Max) { - stats.set(Stat::Max, max.cast(self.dtype())?); + stats.set(Stat::Max, max.cast(array.dtype())?); } Ok(stats) } } -impl ArrayStatisticsCompute for &[T] { - fn compute_statistics(&self, stat: Stat) -> VortexResult { - if self.is_empty() { +impl StatisticsVTable<[T]> for PrimitiveEncoding { + fn compute_statistics(&self, array: &[T], stat: Stat) -> VortexResult { + if array.is_empty() { return Ok(StatsSet::default()); } Ok(match stat { Stat::Min | Stat::Max => { - let mut stats = compute_min_max(self.iter().copied(), true); + let mut stats = compute_min_max(array.iter().copied(), true); stats.set( Stat::IsConstant, stats @@ -68,17 +71,17 @@ impl ArrayStatisticsCompute for &[T] { stats } Stat::IsConstant => { - let first = self[0]; - let is_constant = self.iter().all(|x| first.is_eq(*x)); + let first = array[0]; + let is_constant = array.iter().all(|x| first.is_eq(*x)); StatsSet::from_iter([(Stat::IsConstant, is_constant.into())]) } Stat::NullCount => StatsSet::from_iter([(Stat::NullCount, 0u64.into())]), - Stat::IsSorted => compute_is_sorted(self.iter().copied()), - Stat::IsStrictSorted => compute_is_strict_sorted(self.iter().copied()), - Stat::RunCount => compute_run_count(self.iter().copied()), + Stat::IsSorted => compute_is_sorted(array.iter().copied()), + Stat::IsStrictSorted => compute_is_strict_sorted(array.iter().copied()), + Stat::RunCount => compute_run_count(array.iter().copied()), Stat::BitWidthFreq | Stat::TrailingZeroFreq => { - let mut stats = BitWidthAccumulator::new(self[0]); - self.iter().skip(1).for_each(|next| stats.next(*next)); + let mut stats = BitWidthAccumulator::new(array[0]); + array.iter().skip(1).for_each(|next| stats.next(*next)); stats.finish() } Stat::TrueCount | Stat::UncompressedSizeInBytes => StatsSet::default(), @@ -88,17 +91,21 @@ impl ArrayStatisticsCompute for &[T] { struct NullableValues<'a, T: PStatsType>(&'a [T], &'a BooleanBuffer); -impl ArrayStatisticsCompute for NullableValues<'_, T> { - fn compute_statistics(&self, stat: Stat) -> VortexResult { - let values = self.0; +impl StatisticsVTable> for PrimitiveEncoding { + fn compute_statistics( + &self, + nulls: &NullableValues<'_, T>, + stat: Stat, + ) -> VortexResult { + let values = nulls.0; if values.is_empty() || stat == Stat::TrueCount || stat == Stat::UncompressedSizeInBytes { return Ok(StatsSet::default()); } - let null_count = values.len() - self.1.count_set_bits(); + let null_count = values.len() - nulls.1.count_set_bits(); if null_count == 0 { // no nulls, use the fast path on the values - return values.compute_statistics(stat); + return self.compute_statistics(values, stat); } else if null_count == values.len() { // all nulls! return Ok(StatsSet::nulls( @@ -116,7 +123,7 @@ impl ArrayStatisticsCompute for NullableValues<'_, T> { return Ok(stats); } - let mut set_indices = self.1.set_indices(); + let mut set_indices = nulls.1.set_indices(); if matches!(stat, Stat::Min | Stat::Max) { stats.extend(compute_min_max(set_indices.map(|next| values[next]), false)); } else if stat == Stat::IsSorted { diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index 1621baa329..3789ac0019 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -10,7 +10,7 @@ use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::compute::unary::scalar_at; use crate::compute::{search_sorted, SearchResult, SearchSortedSide}; use crate::encoding::ids; -use crate::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use crate::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::variants::PrimitiveArrayTrait; use crate::{ @@ -192,21 +192,21 @@ impl AcceptArrayVisitor for SparseArray { } } -impl ArrayStatisticsCompute for SparseArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { - let mut stats = self.values().statistics().compute_all(&[stat])?; - if self.len() == self.values().len() { +impl StatisticsVTable for SparseEncoding { + fn compute_statistics(&self, array: &SparseArray, stat: Stat) -> VortexResult { + let mut stats = array.values().statistics().compute_all(&[stat])?; + if array.len() == array.values().len() { return Ok(stats); } - let fill_len = self.len() - self.values().len(); - let fill_stats = if self.fill_value().is_null() { - StatsSet::nulls(fill_len, self.dtype()) + let fill_len = array.len() - array.values().len(); + let fill_stats = if array.fill_value().is_null() { + StatsSet::nulls(fill_len, array.dtype()) } else { - StatsSet::constant(self.fill_scalar(), fill_len) + StatsSet::constant(array.fill_scalar(), fill_len) }; - if self.values().is_empty() { + if array.values().is_empty() { return Ok(fill_stats); } diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 34c59f2c2d..818b13a5f9 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -7,7 +7,7 @@ use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, Vor use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::encoding::ids; -use crate::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet}; +use crate::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use crate::variants::{ArrayVariants, StructArrayTrait}; use crate::{ @@ -192,17 +192,17 @@ impl AcceptArrayVisitor for StructArray { } } -impl ArrayStatisticsCompute for StructArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for StructEncoding { + fn compute_statistics(&self, array: &StructArray, stat: Stat) -> VortexResult { Ok(match stat { - Stat::UncompressedSizeInBytes => self + Stat::UncompressedSizeInBytes => array .children() .map(|f| f.statistics().compute_uncompressed_size_in_bytes()) .reduce(|acc, field_size| acc.zip(field_size).map(|(a, b)| a + b)) .flatten() .map(|size| StatsSet::of(stat, size)) .unwrap_or_default(), - Stat::NullCount => StatsSet::of(stat, self.validity().null_count(self.len())?), + Stat::NullCount => StatsSet::of(stat, array.validity().null_count(array.len())?), _ => StatsSet::default(), }) } diff --git a/vortex-array/src/array/varbin/stats.rs b/vortex-array/src/array/varbin/stats.rs index 4136c4151f..df530238cc 100644 --- a/vortex-array/src/array/varbin/stats.rs +++ b/vortex-array/src/array/varbin/stats.rs @@ -6,19 +6,20 @@ use vortex_error::VortexResult; use crate::accessor::ArrayAccessor; use crate::array::varbin::{varbin_scalar, VarBinArray}; -use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use crate::array::VarBinEncoding; +use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::{ArrayDType, ArrayLen, ArrayTrait as _}; -impl ArrayStatisticsCompute for VarBinArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for VarBinEncoding { + fn compute_statistics(&self, array: &VarBinArray, stat: Stat) -> VortexResult { if stat == Stat::UncompressedSizeInBytes { - return Ok(StatsSet::of(stat, self.nbytes())); + return Ok(StatsSet::of(stat, array.nbytes())); } - if self.is_empty() { + if array.is_empty() { return Ok(StatsSet::default()); } - self.with_iterator(|iter| compute_stats(iter, self.dtype())) + array.with_iterator(|iter| compute_stats(iter, array.dtype())) } } diff --git a/vortex-array/src/array/varbinview/stats.rs b/vortex-array/src/array/varbinview/stats.rs index b81d192bce..87f1343260 100644 --- a/vortex-array/src/array/varbinview/stats.rs +++ b/vortex-array/src/array/varbinview/stats.rs @@ -3,19 +3,20 @@ use vortex_error::VortexResult; use crate::accessor::ArrayAccessor; use crate::array::varbin::compute_stats; use crate::array::varbinview::VarBinViewArray; -use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; +use crate::array::VarBinViewEncoding; +use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::{ArrayDType, ArrayLen, ArrayTrait as _}; -impl ArrayStatisticsCompute for VarBinViewArray { - fn compute_statistics(&self, stat: Stat) -> VortexResult { +impl StatisticsVTable for VarBinViewEncoding { + fn compute_statistics(&self, array: &VarBinViewArray, stat: Stat) -> VortexResult { if stat == Stat::UncompressedSizeInBytes { - return Ok(StatsSet::of(stat, self.nbytes())); + return Ok(StatsSet::of(stat, array.nbytes())); } - if self.is_empty() { + if array.is_empty() { return Ok(StatsSet::default()); } - self.with_iterator(|iter| compute_stats(iter, self.dtype())) + array.with_iterator(|iter| compute_stats(iter, array.dtype())) } } diff --git a/vortex-array/src/data/owned.rs b/vortex-array/src/data/owned.rs index 46a22fd6f2..db440fcc81 100644 --- a/vortex-array/src/data/owned.rs +++ b/vortex-array/src/data/owned.rs @@ -135,8 +135,9 @@ impl Statistics for OwnedArrayData { return Some(s); } - let computed = ArrayData::from(self.clone()) - .with_dyn(|a| a.compute_statistics(stat)) + let computed = self + .encoding() + .compute_statistics(&ArrayData::from(self.clone()), stat) .ok()?; self.stats_set diff --git a/vortex-array/src/data/viewed.rs b/vortex-array/src/data/viewed.rs index 45973c59b3..0293c40fe8 100644 --- a/vortex-array/src/data/viewed.rs +++ b/vortex-array/src/data/viewed.rs @@ -223,8 +223,8 @@ impl Statistics for ViewedArrayData { return Some(s); } - ArrayData::from(self.clone()) - .with_dyn(|a| a.compute_statistics(stat)) + self.encoding() + .compute_statistics(&ArrayData::from(self.clone()), stat) .ok()? .get(stat) .cloned() diff --git a/vortex-array/src/encoding/mod.rs b/vortex-array/src/encoding/mod.rs index 8dd31b2b0d..7482304472 100644 --- a/vortex-array/src/encoding/mod.rs +++ b/vortex-array/src/encoding/mod.rs @@ -7,6 +7,7 @@ use std::hash::{Hash, Hasher}; use vortex_error::{vortex_panic, VortexResult}; use crate::compute::ComputeVTable; +use crate::stats::StatisticsVTable; use crate::{ArrayData, ArrayDef, ArrayMetadata, ArrayTrait, IntoCanonicalVTable, MetadataVTable}; pub mod opaque; @@ -67,7 +68,14 @@ pub type EncodingRef = &'static dyn EncodingVTable; /// Object-safe encoding trait for an array. pub trait EncodingVTable: - 'static + Sync + Send + Debug + IntoCanonicalVTable + MetadataVTable + ComputeVTable + 'static + + Sync + + Send + + Debug + + IntoCanonicalVTable + + MetadataVTable + + ComputeVTable + + StatisticsVTable { fn id(&self) -> EncodingId; diff --git a/vortex-array/src/encoding/opaque.rs b/vortex-array/src/encoding/opaque.rs index 6f2e46ef9d..e1baf36f7d 100644 --- a/vortex-array/src/encoding/opaque.rs +++ b/vortex-array/src/encoding/opaque.rs @@ -6,6 +6,7 @@ use vortex_error::{vortex_bail, VortexResult}; use crate::compute::ComputeVTable; use crate::encoding::{EncodingId, EncodingVTable}; +use crate::stats::StatisticsVTable; use crate::{ ArrayData, ArrayMetadata, ArrayTrait, Canonical, IntoCanonicalVTable, MetadataVTable, TrySerializeArrayMetadata, @@ -62,6 +63,8 @@ impl MetadataVTable for OpaqueEncoding { } } +impl StatisticsVTable for OpaqueEncoding {} + #[derive(Debug)] pub struct OpaqueMetadata; diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 9304323f5d..7728f1d8b4 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -24,7 +24,7 @@ use vortex_error::{VortexExpect, VortexResult}; use crate::array::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::compute::ArrayCompute; use crate::encoding::ArrayEncodingRef; -use crate::stats::{ArrayStatistics, ArrayStatisticsCompute}; +use crate::stats::ArrayStatistics; use crate::validity::ArrayValidity; use crate::variants::ArrayVariants; @@ -98,7 +98,6 @@ pub trait ArrayTrait: + ArrayValidity + AcceptArrayVisitor + ArrayStatistics - + ArrayStatisticsCompute { /// Total size of the array in bytes, including all children and buffers. fn nbytes(&self) -> usize { diff --git a/vortex-array/src/stats/mod.rs b/vortex-array/src/stats/mod.rs index 74b9a80e7e..20e1731950 100644 --- a/vortex-array/src/stats/mod.rs +++ b/vortex-array/src/stats/mod.rs @@ -9,9 +9,10 @@ use itertools::Itertools; pub use statsset::*; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{DType, NativePType}; -use vortex_error::{vortex_panic, VortexError, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexError, VortexResult}; use vortex_scalar::Scalar; +use crate::encoding::Encoding; use crate::ArrayData; pub mod flatbuffers; @@ -126,13 +127,30 @@ pub trait ArrayStatistics { fn inherit_statistics(&self, parent: &dyn Statistics); } -pub trait ArrayStatisticsCompute { +/// Encoding VTable for computing array statistics. +pub trait StatisticsVTable { /// Compute the requested statistic. Can return additional stats. - fn compute_statistics(&self, _stat: Stat) -> VortexResult { + fn compute_statistics(&self, _array: &Array, _stat: Stat) -> VortexResult { Ok(StatsSet::default()) } } +impl StatisticsVTable for E +where + E: StatisticsVTable, + for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>, +{ + fn compute_statistics(&self, array: &ArrayData, stat: Stat) -> VortexResult { + let array_ref = <&E::Array>::try_from(array)?; + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .ok_or_else(|| vortex_err!("Mismatched encoding"))?; + StatisticsVTable::compute_statistics(encoding, array_ref, stat) + } +} + impl dyn Statistics + '_ { pub fn get_as TryFrom<&'a Scalar, Error = VortexError>>( &self,