Skip to content

Commit

Permalink
StatsCompute VTable (#1434)
Browse files Browse the repository at this point in the history
Moves stats compute into a VTable on the encoding.
  • Loading branch information
gatesn authored Nov 21, 2024
1 parent 99659bc commit 30e8a21
Show file tree
Hide file tree
Showing 30 changed files with 240 additions and 193 deletions.
4 changes: 2 additions & 2 deletions encodings/alp/src/alp/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::PrimitiveArray;
use vortex_array::encoding::ids;
use vortex_array::iter::{Accessor, AccessorRef};
use vortex_array::stats::ArrayStatisticsCompute;
use vortex_array::stats::StatisticsVTable;
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity};
use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -278,4 +278,4 @@ impl AcceptArrayVisitor for ALPArray {
}
}

impl ArrayStatisticsCompute for ALPArray {}
impl StatisticsVTable<ALPArray> for ALPEncoding {}
4 changes: 2 additions & 2 deletions encodings/alp/src/alp_rd/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::{PrimitiveArray, SparseArray};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatisticsCompute, StatsSet};
use vortex_array::stats::{StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity};
use vortex_array::{
impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
Expand Down Expand Up @@ -259,7 +259,7 @@ impl AcceptArrayVisitor for ALPRDArray {
}
}

impl ArrayStatisticsCompute for ALPRDArray {}
impl StatisticsVTable<ALPRDArray> for ALPRDEncoding {}

impl ArrayTrait for ALPRDArray {}

Expand Down
20 changes: 13 additions & 7 deletions encodings/bytebool/src/stats.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
use vortex_array::stats::{ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_array::{ArrayLen, IntoArrayVariant};
use vortex_error::VortexResult;

use super::ByteBoolArray;
use super::{ByteBoolArray, ByteBoolEncoding};

impl ArrayStatisticsCompute for ByteBoolArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
if self.is_empty() {
impl StatisticsVTable<ByteBoolArray> for ByteBoolEncoding {
fn compute_statistics(&self, array: &ByteBoolArray, stat: Stat) -> VortexResult<StatsSet> {
if array.is_empty() {
return Ok(StatsSet::default());
}

// TODO(adamgs): This is slightly wasteful and could be optimized in the future
let bools = self.as_ref().clone().into_bool()?;
bools.compute_statistics(stat)
let bools = array.as_ref().clone().into_bool()?;
Ok(StatsSet::from_iter(
bools
.statistics()
.compute(stat)
.into_iter()
.map(|value| (stat, value)),
))
}
}

Expand Down
8 changes: 4 additions & 4 deletions encodings/datetime-parts/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::StructArray;
use vortex_array::compute::unary::try_cast;
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{Stat, StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity};
use vortex_array::variants::{ArrayVariants, ExtensionArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -162,10 +162,10 @@ impl AcceptArrayVisitor for DateTimePartsArray {
}
}

impl ArrayStatisticsCompute for DateTimePartsArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
impl StatisticsVTable<DateTimePartsArray> for DateTimePartsEncoding {
fn compute_statistics(&self, array: &DateTimePartsArray, stat: Stat) -> VortexResult<StatsSet> {
let maybe_stat = match stat {
Stat::NullCount => Some(Scalar::from(self.validity().null_count(self.len())?)),
Stat::NullCount => Some(Scalar::from(array.validity().null_count(array.len())?)),
_ => None,
};

Expand Down
24 changes: 12 additions & 12 deletions encodings/dict/src/stats.rs
Original file line number Diff line number Diff line change
@@ -1,54 +1,54 @@
use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_error::VortexResult;

use crate::DictArray;
use crate::{DictArray, DictEncoding};

impl ArrayStatisticsCompute for DictArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
impl StatisticsVTable<DictArray> for DictEncoding {
fn compute_statistics(&self, array: &DictArray, stat: Stat) -> VortexResult<StatsSet> {
let mut stats = StatsSet::default();

match stat {
Stat::RunCount => {
if let Some(rc) = self.codes().statistics().compute(Stat::RunCount) {
if let Some(rc) = array.codes().statistics().compute(Stat::RunCount) {
stats.set(Stat::RunCount, rc);
}
}
Stat::Min => {
if let Some(min) = self.values().statistics().compute(Stat::Min) {
if let Some(min) = array.values().statistics().compute(Stat::Min) {
stats.set(Stat::Min, min);
}
}
Stat::Max => {
if let Some(max) = self.values().statistics().compute(Stat::Max) {
if let Some(max) = array.values().statistics().compute(Stat::Max) {
stats.set(Stat::Max, max);
}
}
Stat::IsConstant => {
if let Some(is_constant) = self.codes().statistics().compute(Stat::IsConstant) {
if let Some(is_constant) = array.codes().statistics().compute(Stat::IsConstant) {
stats.set(Stat::IsConstant, is_constant);
}
}
Stat::NullCount => {
if let Some(null_count) = self.codes().statistics().compute(Stat::NullCount) {
if let Some(null_count) = array.codes().statistics().compute(Stat::NullCount) {
stats.set(Stat::NullCount, null_count);
}
}
Stat::IsSorted | Stat::IsStrictSorted => {
// if dictionary is sorted
if self
if array
.values()
.statistics()
.compute_is_sorted()
.unwrap_or(false)
{
if let Some(codes_are_sorted) =
self.codes().statistics().compute(Stat::IsSorted)
array.codes().statistics().compute(Stat::IsSorted)
{
stats.set(Stat::IsSorted, codes_are_sorted);
}

if let Some(codes_are_strict_sorted) =
self.codes().statistics().compute(Stat::IsStrictSorted)
array.codes().statistics().compute(Stat::IsStrictSorted)
{
stats.set(Stat::IsStrictSorted, codes_are_strict_sorted);
}
Expand Down
4 changes: 2 additions & 2 deletions encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use fastlanes::BitPacking;
use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::{PrimitiveArray, SparseArray};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatisticsCompute, StatsSet};
use vortex_array::stats::{StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -228,7 +228,7 @@ impl AcceptArrayVisitor for BitPackedArray {
}
}

impl ArrayStatisticsCompute for BitPackedArray {}
impl StatisticsVTable<BitPackedArray> for BitPackedEncoding {}

impl ArrayTrait for BitPackedArray {
fn nbytes(&self) -> usize {
Expand Down
4 changes: 2 additions & 2 deletions encodings/fastlanes/src/delta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::PrimitiveArray;
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatisticsCompute, StatsSet};
use vortex_array::stats::{StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -249,4 +249,4 @@ impl AcceptArrayVisitor for DeltaArray {
}
}

impl ArrayStatisticsCompute for DeltaArray {}
impl StatisticsVTable<DeltaArray> for DeltaEncoding {}
4 changes: 2 additions & 2 deletions encodings/fastlanes/src/for/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pub use compress::*;
use serde::{Deserialize, Serialize};
use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatisticsCompute, StatsSet};
use vortex_array::stats::{StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity};
use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -105,7 +105,7 @@ impl AcceptArrayVisitor for FoRArray {
}
}

impl ArrayStatisticsCompute for FoRArray {}
impl StatisticsVTable<FoRArray> for FoREncoding {}

impl ArrayTrait for FoRArray {
fn nbytes(&self) -> usize {
Expand Down
4 changes: 2 additions & 2 deletions encodings/fsst/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::{VarBin, VarBinArray};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatisticsCompute, StatsSet};
use vortex_array::stats::{StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity};
use vortex_array::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -200,7 +200,7 @@ impl AcceptArrayVisitor for FSSTArray {
}
}

impl ArrayStatisticsCompute for FSSTArray {}
impl StatisticsVTable<FSSTArray> for FSSTEncoding {}

impl ArrayValidity for FSSTArray {
fn is_valid(&self, index: usize) -> bool {
Expand Down
19 changes: 10 additions & 9 deletions encodings/roaring/src/boolean/stats.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use vortex_array::stats::{ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{Stat, StatisticsVTable, StatsSet};
use vortex_array::ArrayLen;
use vortex_error::{vortex_err, VortexResult};

use crate::RoaringBoolArray;
use crate::{RoaringBoolArray, RoaringBoolEncoding};

impl ArrayStatisticsCompute for RoaringBoolArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
impl StatisticsVTable<RoaringBoolArray> for RoaringBoolEncoding {
fn compute_statistics(&self, array: &RoaringBoolArray, stat: Stat) -> VortexResult<StatsSet> {
// Only needs to compute IsSorted, IsStrictSorted and RunCount all other stats have been populated on construction
let bitmap = self.bitmap();
let bitmap = array.bitmap();
let true_count = bitmap.statistics().cardinality;
if matches!(
stat,
Expand All @@ -16,12 +16,12 @@ impl ArrayStatisticsCompute for RoaringBoolArray {
return Ok(StatsSet::bools_with_true_and_null_count(
true_count as usize,
0,
self.len(),
array.len(),
));
}

if matches!(stat, Stat::IsSorted | Stat::IsStrictSorted) {
let is_sorted = if true_count == 0 || true_count == self.len() as u64 {
let is_sorted = if true_count == 0 || true_count == array.len() as u64 {
true
} else {
let min_idx = bitmap.minimum().ok_or_else(|| {
Expand All @@ -30,11 +30,12 @@ impl ArrayStatisticsCompute for RoaringBoolArray {
let max_idx = bitmap.maximum().ok_or_else(|| {
vortex_err!("Bitmap has no maximum despite having cardinality > 0")
})?;
(max_idx as usize + 1 == self.len()) && (max_idx + 1 - min_idx) as u64 == true_count
(max_idx as usize + 1 == array.len())
&& (max_idx + 1 - min_idx) as u64 == true_count
};

let is_strict_sorted =
is_sorted && (self.len() <= 1 || (self.len() == 2 && true_count == 1));
is_sorted && (array.len() <= 1 || (array.len() == 2 && true_count == 1));
return Ok(StatsSet::from_iter([
(Stat::IsSorted, is_sorted.into()),
(Stat::IsStrictSorted, is_strict_sorted.into()),
Expand Down
8 changes: 4 additions & 4 deletions encodings/roaring/src/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::unary::try_cast;
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity};
use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -137,12 +137,12 @@ impl AcceptArrayVisitor for RoaringIntArray {
}
}

impl ArrayStatisticsCompute for RoaringIntArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
impl StatisticsVTable<RoaringIntArray> for RoaringIntEncoding {
fn compute_statistics(&self, array: &RoaringIntArray, stat: Stat) -> VortexResult<StatsSet> {
// possibly faster to write an accumulator over the iterator, though not necessarily
if stat == Stat::TrailingZeroFreq || stat == Stat::BitWidthFreq || stat == Stat::RunCount {
let primitive =
PrimitiveArray::from_vec(self.owned_bitmap().to_vec(), Validity::NonNullable);
PrimitiveArray::from_vec(array.owned_bitmap().to_vec(), Validity::NonNullable);
primitive.statistics().compute_all(&[
Stat::TrailingZeroFreq,
Stat::BitWidthFreq,
Expand Down
21 changes: 10 additions & 11 deletions encodings/runend-bool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use vortex_array::array::{BoolArray, PrimitiveArray};
use vortex_array::compute::unary::scalar_at;
use vortex_array::compute::{search_sorted, SearchSortedSide};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
use vortex_array::variants::{ArrayVariants, BoolArrayTrait, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -229,17 +229,17 @@ impl AcceptArrayVisitor for RunEndBoolArray {
}
}

impl ArrayStatisticsCompute for RunEndBoolArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
impl StatisticsVTable<RunEndBoolArray> for RunEndBoolEncoding {
fn compute_statistics(&self, array: &RunEndBoolArray, stat: Stat) -> VortexResult<StatsSet> {
let maybe_scalar: Option<Scalar> = match stat {
Stat::NullCount => Some(self.validity().null_count(self.len())?.into()),
Stat::NullCount => Some(array.validity().null_count(array.len())?.into()),
Stat::TrueCount => {
let pends = self.ends().into_primitive()?;
let pends = array.ends().into_primitive()?;
let mut true_count: usize = 0;
let mut prev_end: usize = 0;
let mut include = self.start();
let mut include = array.start();
match_each_unsigned_integer_ptype!(pends.ptype(), |$P| {
for end in trimmed_ends_iter(pends.maybe_null_slice::<$P>(), self.offset(), self.len()) {
for end in trimmed_ends_iter(pends.maybe_null_slice::<$P>(), array.offset(), array.len()) {
if include {
true_count += end - prev_end;
}
Expand Down Expand Up @@ -268,7 +268,7 @@ mod test {
use vortex_array::array::{BoolArray, PrimitiveArray};
use vortex_array::compute::unary::scalar_at;
use vortex_array::compute::{slice, take, TakeOptions};
use vortex_array::stats::{ArrayStatistics as _, ArrayStatisticsCompute};
use vortex_array::stats::ArrayStatistics;
use vortex_array::validity::Validity;
use vortex_array::{
ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical, ToArrayData,
Expand Down Expand Up @@ -406,10 +406,9 @@ mod test {
Stat::IsStrictSorted,
] {
// call compute_statistics directly to avoid caching
let bools_stats = bools.compute_statistics(stat).unwrap();
let expected = bools_stats.get(stat).unwrap();
let expected = bools.statistics().compute(stat).unwrap();
let actual = arr.statistics().compute(stat).unwrap();
assert_eq!(expected, &actual);
assert_eq!(expected, actual);
}

assert_eq!(arr.statistics().compute_run_count(), Some(ends_len));
Expand Down
15 changes: 8 additions & 7 deletions encodings/runend/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use vortex_array::array::PrimitiveArray;
use vortex_array::compute::unary::scalar_at;
use vortex_array::compute::{search_sorted, search_sorted_usize_many, SearchSortedSide};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet};
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
use vortex_array::variants::{ArrayVariants, BoolArrayTrait, PrimitiveArrayTrait};
use vortex_array::{
Expand Down Expand Up @@ -248,17 +248,18 @@ impl AcceptArrayVisitor for RunEndArray {
}
}

impl ArrayStatisticsCompute for RunEndArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
impl StatisticsVTable<RunEndArray> for RunEndEncoding {
fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult<StatsSet> {
let maybe_stat = match stat {
Stat::Min | Stat::Max => self.values().statistics().compute(stat),
Stat::NullCount => Some(Scalar::from(self.validity().null_count(self.len())?)),
Stat::Min | Stat::Max => array.values().statistics().compute(stat),
Stat::NullCount => Some(Scalar::from(array.validity().null_count(array.len())?)),
Stat::IsSorted => Some(Scalar::from(
self.values()
array
.values()
.statistics()
.compute_is_sorted()
.unwrap_or(false)
&& self.logical_validity().all_valid(),
&& array.logical_validity().all_valid(),
)),
_ => None,
};
Expand Down
Loading

0 comments on commit 30e8a21

Please sign in to comment.