From 021cbe8b8ccc84ac15ffaf2769663fb98aacfde9 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 4 Dec 2024 18:24:54 -0500 Subject: [PATCH] Add `maybe_from` function to help downcast ArrayData into a specific encoded array without potentially capturing a backtrace (#1560) closes #1377 --- bench-vortex/benches/compress.rs | 2 +- encodings/alp/src/alp/array.rs | 2 +- .../fastlanes/src/bitpacking/compute/search_sorted.rs | 2 +- encodings/fastlanes/src/bitpacking/compute/slice.rs | 2 +- vortex-array/src/array/extension/compute/compare.rs | 4 +--- vortex-array/src/macros.rs | 8 ++++++++ vortex-ipc/src/stream_writer/mod.rs | 8 +++----- vortex-sampling-compressor/src/compressors/alp.rs | 2 +- vortex-sampling-compressor/src/compressors/alp_rd.rs | 2 +- vortex-sampling-compressor/src/compressors/bitpacked.rs | 2 +- vortex-sampling-compressor/src/compressors/delta.rs | 2 +- vortex-sampling-compressor/src/compressors/dict.rs | 9 +++------ vortex-sampling-compressor/src/compressors/for.rs | 2 +- vortex-sampling-compressor/src/compressors/zigzag.rs | 2 +- 14 files changed, 25 insertions(+), 24 deletions(-) diff --git a/bench-vortex/benches/compress.rs b/bench-vortex/benches/compress.rs index 088cf3056e..2cfd12d7df 100644 --- a/bench-vortex/benches/compress.rs +++ b/bench-vortex/benches/compress.rs @@ -104,7 +104,7 @@ fn parquet_decompress_read(buf: bytes::Bytes) -> usize { } fn parquet_compressed_written_size(array: &ArrayData, compression: Compression) -> usize { - let chunked = ChunkedArray::try_from(array.clone()).unwrap(); + let chunked = ChunkedArray::maybe_from(array.clone()).unwrap(); let (batches, schema) = chunked_to_vec_record_batch(chunked); parquet_compress_write(batches, schema, compression, &mut Vec::new()) } diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 91e1f67d3c..eae2d6e3b3 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -77,7 +77,7 @@ impl ALPArray { } pub fn encode(array: ArrayData) -> VortexResult { - if let Ok(parray) = PrimitiveArray::try_from(array) { + if let Some(parray) = PrimitiveArray::maybe_from(array) { Ok(alp_encode(&parray)?.into_array()) } else { vortex_bail!("ALP can only encode primitive arrays"); diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index a7cfe4059c..f8b85b004e 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -147,7 +147,7 @@ impl<'a, T: BitPacking + NativePType> BitPackedSearch<'a, T> { let min_patch_offset = array .patches() .and_then(|p| { - SparseArray::try_from(p) + SparseArray::maybe_from(p) .vortex_expect("Only sparse patches are supported") .min_index() }) diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index a2be5e461c..8fef04b8d4 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -30,7 +30,7 @@ impl SliceFn for BitPackedEncoding { .filter(|a| { // If the sliced patch_indices is empty, we should not propagate the patches. // There may be other logic that depends on Some(patches) indicating non-empty. - !SparseArray::try_from(a.clone()) + !SparseArray::maybe_from(a.clone()) .vortex_expect("BitPackedArray must encode patches as SparseArray") .indices() .is_empty() diff --git a/vortex-array/src/array/extension/compute/compare.rs b/vortex-array/src/array/extension/compute/compare.rs index 9dbc1ecc8c..47e53be14b 100644 --- a/vortex-array/src/array/extension/compute/compare.rs +++ b/vortex-array/src/array/extension/compute/compare.rs @@ -2,7 +2,6 @@ use vortex_error::VortexResult; use crate::array::{ConstantArray, ExtensionArray, ExtensionEncoding}; use crate::compute::{compare, CompareFn, Operator}; -use crate::encoding::EncodingVTable; use crate::{ArrayData, ArrayLen}; impl CompareFn for ExtensionEncoding { @@ -24,8 +23,7 @@ impl CompareFn for ExtensionEncoding { } // If the RHS is an extension array matching ours, we can extract the storage. - if rhs.is_encoding(ExtensionEncoding.id()) { - let rhs_ext = ExtensionArray::try_from(rhs.clone())?; + if let Some(rhs_ext) = ExtensionArray::maybe_from(rhs.clone()) { return compare(lhs.storage(), rhs_ext.storage(), operator).map(Some); } diff --git a/vortex-array/src/macros.rs b/vortex-array/src/macros.rs index 9a363b2bdc..9a69549309 100644 --- a/vortex-array/src/macros.rs +++ b/vortex-array/src/macros.rs @@ -58,6 +58,14 @@ macro_rules! impl_encoding { stats )?) } + + /// Optionally downcast an [`ArrayData`](crate::ArrayData) instance to a specific encoding. + /// + /// Preferred in cases where a backtrace isn't needed, like when trying multiple encoding to go + /// down different code paths. + pub fn maybe_from(data: $crate::ArrayData) -> Option { + (data.encoding().id() == <[<$Name Encoding>] as $crate::encoding::Encoding>::ID).then_some(Self(data)) + } } impl TryFrom<$crate::ArrayData> for [<$Name Array>] { diff --git a/vortex-ipc/src/stream_writer/mod.rs b/vortex-ipc/src/stream_writer/mod.rs index b3f7210526..800c7fb607 100644 --- a/vortex-ipc/src/stream_writer/mod.rs +++ b/vortex-ipc/src/stream_writer/mod.rs @@ -2,8 +2,7 @@ use std::fmt::{Display, Formatter}; use std::ops::Range; use futures_util::{Stream, TryStreamExt}; -use vortex_array::array::{ChunkedArray, ChunkedEncoding}; -use vortex_array::encoding::EncodingVTable; +use vortex_array::array::ChunkedArray; use vortex_array::stream::ArrayStream; use vortex_array::ArrayData; use vortex_buffer::Buffer; @@ -83,9 +82,8 @@ impl StreamArrayWriter { } pub async fn write_array(self, array: ArrayData) -> VortexResult { - if array.is_encoding(ChunkedEncoding.id()) { - self.write_array_stream(ChunkedArray::try_from(array)?.array_stream()) - .await + if let Some(chunked_array) = ChunkedArray::maybe_from(array.clone()) { + self.write_array_stream(chunked_array.array_stream()).await } else { self.write_array_stream(array.into_array_stream()).await } diff --git a/vortex-sampling-compressor/src/compressors/alp.rs b/vortex-sampling-compressor/src/compressors/alp.rs index de3985029f..df1f506e01 100644 --- a/vortex-sampling-compressor/src/compressors/alp.rs +++ b/vortex-sampling-compressor/src/compressors/alp.rs @@ -26,7 +26,7 @@ impl EncodingCompressor for ALPCompressor { fn can_compress(&self, array: &ArrayData) -> Option<&dyn EncodingCompressor> { // Only support primitive arrays - let parray = PrimitiveArray::try_from(array.clone()).ok()?; + let parray = PrimitiveArray::maybe_from(array.clone())?; // Only supports f32 and f64 if !matches!(parray.ptype(), PType::F32 | PType::F64) { diff --git a/vortex-sampling-compressor/src/compressors/alp_rd.rs b/vortex-sampling-compressor/src/compressors/alp_rd.rs index 96f44473ca..410afce9f6 100644 --- a/vortex-sampling-compressor/src/compressors/alp_rd.rs +++ b/vortex-sampling-compressor/src/compressors/alp_rd.rs @@ -35,7 +35,7 @@ impl EncodingCompressor for ALPRDCompressor { fn can_compress(&self, array: &ArrayData) -> Option<&dyn EncodingCompressor> { // Only support primitive arrays - let parray = PrimitiveArray::try_from(array.clone()).ok()?; + let parray = PrimitiveArray::maybe_from(array.clone())?; // Only supports f32 and f64 if !matches!(parray.ptype(), PType::F32 | PType::F64) { diff --git a/vortex-sampling-compressor/src/compressors/bitpacked.rs b/vortex-sampling-compressor/src/compressors/bitpacked.rs index 97741a7eee..f1138bd509 100644 --- a/vortex-sampling-compressor/src/compressors/bitpacked.rs +++ b/vortex-sampling-compressor/src/compressors/bitpacked.rs @@ -54,7 +54,7 @@ impl EncodingCompressor for BitPackedCompressor { fn can_compress(&self, array: &ArrayData) -> Option<&dyn EncodingCompressor> { // Only support primitive arrays - let parray = PrimitiveArray::try_from(array.clone()).ok()?; + let parray = PrimitiveArray::maybe_from(array.clone())?; // Only supports unsigned ints if !parray.ptype().is_unsigned_int() { diff --git a/vortex-sampling-compressor/src/compressors/delta.rs b/vortex-sampling-compressor/src/compressors/delta.rs index 74c0124cf5..d5107be7b9 100644 --- a/vortex-sampling-compressor/src/compressors/delta.rs +++ b/vortex-sampling-compressor/src/compressors/delta.rs @@ -24,7 +24,7 @@ impl EncodingCompressor for DeltaCompressor { fn can_compress(&self, array: &ArrayData) -> Option<&dyn EncodingCompressor> { // Only support primitive arrays - let parray = PrimitiveArray::try_from(array.clone()).ok()?; + let parray = PrimitiveArray::maybe_from(array.clone())?; // Only supports ints if !parray.ptype().is_unsigned_int() { diff --git a/vortex-sampling-compressor/src/compressors/dict.rs b/vortex-sampling-compressor/src/compressors/dict.rs index 112861deef..9eb305cee0 100644 --- a/vortex-sampling-compressor/src/compressors/dict.rs +++ b/vortex-sampling-compressor/src/compressors/dict.rs @@ -54,16 +54,13 @@ impl EncodingCompressor for DictCompressor { like: Option>, ctx: SamplingCompressor<'a>, ) -> VortexResult> { - let (codes, values) = if array.is_encoding(PrimitiveEncoding::ID) { - let p = PrimitiveArray::try_from(array.clone())?; + let (codes, values) = if let Some(p) = PrimitiveArray::maybe_from(array.clone()) { let (codes, values) = dict_encode_primitive(&p); (codes.into_array(), values.into_array()) - } else if array.is_encoding(VarBinEncoding::ID) { - let vb = VarBinArray::try_from(array.clone())?; + } else if let Some(vb) = VarBinArray::maybe_from(array.clone()) { let (codes, values) = dict_encode_varbin(&vb); (codes.into_array(), values.into_array()) - } else if array.is_encoding(VarBinViewEncoding::ID) { - let vb = VarBinViewArray::try_from(array.clone())?; + } else if let Some(vb) = VarBinViewArray::maybe_from(array.clone()) { let (codes, values) = dict_encode_varbinview(&vb); (codes.into_array(), values.into_array()) } else { diff --git a/vortex-sampling-compressor/src/compressors/for.rs b/vortex-sampling-compressor/src/compressors/for.rs index f309787e62..ab088db409 100644 --- a/vortex-sampling-compressor/src/compressors/for.rs +++ b/vortex-sampling-compressor/src/compressors/for.rs @@ -26,7 +26,7 @@ impl EncodingCompressor for FoRCompressor { fn can_compress(&self, array: &ArrayData) -> Option<&dyn EncodingCompressor> { // Only support primitive arrays - let parray = PrimitiveArray::try_from(array.clone()).ok()?; + let parray = PrimitiveArray::maybe_from(array.clone())?; // Only supports integers if !parray.ptype().is_int() { diff --git a/vortex-sampling-compressor/src/compressors/zigzag.rs b/vortex-sampling-compressor/src/compressors/zigzag.rs index b5b3e0759c..8f60b4bf70 100644 --- a/vortex-sampling-compressor/src/compressors/zigzag.rs +++ b/vortex-sampling-compressor/src/compressors/zigzag.rs @@ -24,7 +24,7 @@ impl EncodingCompressor for ZigZagCompressor { fn can_compress(&self, array: &ArrayData) -> Option<&dyn EncodingCompressor> { // Only support primitive arrays - let parray = PrimitiveArray::try_from(array.clone()).ok()?; + let parray = PrimitiveArray::maybe_from(array.clone())?; // Only supports signed integers if !parray.ptype().is_signed_int() {