Skip to content

Commit

Permalink
Add TakeOptions to skip bounds checking (#1343)
Browse files Browse the repository at this point in the history
For Q6 and Q19 this seems to help by ~15%. For the others, it's reported
as an increase, but I'm not convinced this isn't just a slow benchmark
run since there _shouldn't_ be any reason for an increase.
  • Loading branch information
gatesn authored Nov 17, 2024
1 parent eee490e commit 508db10
Show file tree
Hide file tree
Showing 40 changed files with 565 additions and 200 deletions.
11 changes: 7 additions & 4 deletions encodings/alp/src/alp/compute.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use vortex_array::array::ConstantArray;
use vortex_array::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn};
use vortex_array::compute::{
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn, TakeFn,
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn,
TakeFn, TakeOptions,
};
use vortex_array::stats::{ArrayStatistics, Stat};
use vortex_array::variants::PrimitiveArrayTrait;
Expand Down Expand Up @@ -60,12 +61,14 @@ impl ScalarAtFn for ALPArray {
}

impl TakeFn for ALPArray {
fn take(&self, indices: &ArrayData) -> VortexResult<ArrayData> {
fn take(&self, indices: &ArrayData, options: TakeOptions) -> VortexResult<ArrayData> {
// TODO(ngates): wrap up indices in an array that caches decompression?
Ok(Self::try_new(
take(self.encoded(), indices)?,
take(self.encoded(), indices, options)?,
self.exponents(),
self.patches().map(|p| take(&p, indices)).transpose()?,
self.patches()
.map(|p| take(&p, indices, options))
.transpose()?,
)?
.into_array())
}
Expand Down
24 changes: 14 additions & 10 deletions encodings/alp/src/alp_rd/compute/take.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
use vortex_array::compute::{take, TakeFn};
use vortex_array::compute::{take, TakeFn, TakeOptions};
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
use vortex_error::VortexResult;

use crate::ALPRDArray;

impl TakeFn for ALPRDArray {
fn take(&self, indices: &ArrayData) -> VortexResult<ArrayData> {
fn take(&self, indices: &ArrayData, options: TakeOptions) -> VortexResult<ArrayData> {
let left_parts_exceptions = self
.left_parts_exceptions()
.map(|array| take(&array, indices))
.map(|array| take(&array, indices, options))
.transpose()?;

Ok(ALPRDArray::try_new(
self.dtype().clone(),
take(self.left_parts(), indices)?,
take(self.left_parts(), indices, options)?,
self.left_parts_dict(),
take(self.right_parts(), indices)?,
take(self.right_parts(), indices, options)?,
self.right_bit_width(),
left_parts_exceptions,
)?
Expand All @@ -27,7 +27,7 @@ impl TakeFn for ALPRDArray {
mod test {
use rstest::rstest;
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::take;
use vortex_array::compute::{take, TakeOptions};
use vortex_array::IntoArrayVariant;

use crate::{ALPRDFloat, RDEncoder};
Expand All @@ -41,10 +41,14 @@ mod test {

assert!(encoded.left_parts_exceptions().is_some());

let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
.unwrap()
.into_primitive()
.unwrap();
let taken = take(
encoded.as_ref(),
PrimitiveArray::from(vec![0, 2]).as_ref(),
TakeOptions::default(),
)
.unwrap()
.into_primitive()
.unwrap();

assert_eq!(taken.maybe_null_slice::<T>(), &[a, outlier]);
}
Expand Down
4 changes: 2 additions & 2 deletions encodings/bytebool/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use num_traits::AsPrimitive;
use vortex_array::compute::unary::{FillForwardFn, ScalarAtFn};
use vortex_array::compute::{ArrayCompute, SliceFn, TakeFn};
use vortex_array::compute::{ArrayCompute, SliceFn, TakeFn, TakeOptions};
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
Expand Down Expand Up @@ -49,7 +49,7 @@ impl SliceFn for ByteBoolArray {
}

impl TakeFn for ByteBoolArray {
fn take(&self, indices: &ArrayData) -> VortexResult<ArrayData> {
fn take(&self, indices: &ArrayData, _options: TakeOptions) -> VortexResult<ArrayData> {
let validity = self.validity();
let indices = indices.clone().as_primitive();
let bools = self.maybe_null_slice();
Expand Down
10 changes: 5 additions & 5 deletions encodings/datetime-parts/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use itertools::Itertools as _;
use vortex_array::array::{PrimitiveArray, TemporalArray};
use vortex_array::compute::unary::{scalar_at, ScalarAtFn};
use vortex_array::compute::{slice, take, ArrayCompute, SliceFn, TakeFn};
use vortex_array::compute::{slice, take, ArrayCompute, SliceFn, TakeFn, TakeOptions};
use vortex_array::validity::ArrayValidity;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_datetime_dtype::{TemporalMetadata, TimeUnit};
Expand All @@ -26,12 +26,12 @@ impl ArrayCompute for DateTimePartsArray {
}

impl TakeFn for DateTimePartsArray {
fn take(&self, indices: &ArrayData) -> VortexResult<ArrayData> {
fn take(&self, indices: &ArrayData, options: TakeOptions) -> VortexResult<ArrayData> {
Ok(Self::try_new(
self.dtype().clone(),
take(self.days(), indices)?,
take(self.seconds(), indices)?,
take(self.subsecond(), indices)?,
take(self.days(), indices, options)?,
take(self.seconds(), indices, options)?,
take(self.subsecond(), indices, options)?,
)?
.into_array())
}
Expand Down
6 changes: 3 additions & 3 deletions encodings/dict/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use arrow_buffer::BooleanBuffer;
use serde::{Deserialize, Serialize};
use vortex_array::array::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex_array::array::BoolArray;
use vortex_array::compute::take;
use vortex_array::compute::unary::scalar_at;
use vortex_array::compute::{take, TakeOptions};
use vortex_array::encoding::ids;
use vortex_array::stats::StatsSet;
use vortex_array::validity::{ArrayValidity, LogicalValidity};
Expand Down Expand Up @@ -75,10 +75,10 @@ impl IntoCanonical for DictArray {
// copies of the view pointers.
DType::Utf8(_) | DType::Binary(_) => {
let canonical_values: ArrayData = self.values().into_canonical()?.into();
take(canonical_values, self.codes())?.into_canonical()
take(canonical_values, self.codes(), TakeOptions::default())?.into_canonical()
}
// Non-string case: take and then canonicalize
_ => take(self.values(), self.codes())?.into_canonical(),
_ => take(self.values(), self.codes(), TakeOptions::default())?.into_canonical(),
}
}
}
Expand Down
7 changes: 4 additions & 3 deletions encodings/dict/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use vortex_array::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn};
use vortex_array::compute::{
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn, TakeFn,
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn,
TakeFn, TakeOptions,
};
use vortex_array::stats::{ArrayStatistics, Stat};
use vortex_array::{ArrayData, IntoArrayData};
Expand Down Expand Up @@ -75,11 +76,11 @@ impl ScalarAtFn for DictArray {
}

impl TakeFn for DictArray {
fn take(&self, indices: &ArrayData) -> VortexResult<ArrayData> {
fn take(&self, indices: &ArrayData, options: TakeOptions) -> VortexResult<ArrayData> {
// Dict
// codes: 0 0 1
// dict: a b c d e f g h
let codes = take(self.codes(), indices)?;
let codes = take(self.codes(), indices, options)?;
Self::try_new(codes, self.values()).map(|a| a.into_array())
}
}
Expand Down
23 changes: 21 additions & 2 deletions encodings/dict/src/variants.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
use vortex_array::variants::{
ArrayVariants, BinaryArrayTrait, PrimitiveArrayTrait, Utf8ArrayTrait,
ArrayVariants, BinaryArrayTrait, BoolArrayTrait, PrimitiveArrayTrait, Utf8ArrayTrait,
};
use vortex_array::ArrayDType;
use vortex_array::{ArrayDType, ArrayData};
use vortex_dtype::DType;
use vortex_error::VortexResult;

use crate::DictArray;

impl ArrayVariants for DictArray {
fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> {
matches!(self.dtype(), DType::Bool(..)).then_some(self)
}

fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> {
matches!(self.dtype(), DType::Primitive(..)).then_some(self)
}
Expand All @@ -20,6 +25,20 @@ impl ArrayVariants for DictArray {
}
}

impl BoolArrayTrait for DictArray {
fn invert(&self) -> VortexResult<ArrayData> {
todo!()
}

fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
todo!()
}

fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
todo!()
}
}

impl PrimitiveArrayTrait for DictArray {}

impl Utf8ArrayTrait for DictArray {}
Expand Down
Loading

0 comments on commit 508db10

Please sign in to comment.