Skip to content

Commit

Permalink
feat: compare array metadata against checked in goldenfiles (#1812)
Browse files Browse the repository at this point in the history
This is part of a general epic (is that word taboo?) around stability. I
have a separate changeset that stores an entire Vortex file and checks
for round-trip compatibility.

However, it's also helpful to know on a per-encoding level that the
metadata format is stable.

This PR adds [goldenfile](https://crates.io/crates/goldenfile) tests for
all encoding metadata. This is a bit simplistic: we don't check for
backward/forward compatibility, this is just an attempt to make it
clearer on PRs when changes are made to the metadata format.

Now when changes are made to Array metadata, you should re-run the test
with

```
UPDATE_GOLDENFILES=1 cargo test -p ...
```

and that will update the checked-in metadatas.



Roughly half of the changed files are checked-in snapshots, most of the
other half are simple test definitions. The only serious change is the
addition of the `check_metadata` helper in test-utils.
  • Loading branch information
a10y authored Jan 6, 2025
1 parent 98ac114 commit 1f894d3
Show file tree
Hide file tree
Showing 66 changed files with 496 additions and 34 deletions.
53 changes: 50 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ futures = { version = "0.3", default-features = false }
futures-executor = "0.3"
futures-util = "0.3"
getrandom = "0.2.14"
goldenfile = "1"
half = { version = "^2", features = ["std", "num-traits"] }
hashbrown = "0.15.0"
homedir = "0.3.3"
Expand Down
2 changes: 1 addition & 1 deletion encodings/alp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ vortex-fastlanes = { workspace = true }
vortex-scalar = { workspace = true }

[dev-dependencies]
arrow-array = { workspace = true }
divan = { workspace = true }
rstest = { workspace = true }
vortex-array = { path = "../../vortex-array", features = ["test_util"] }

[[bench]]
name = "alp_compress"
Expand Down
Binary file added encodings/alp/goldenfiles/alp.metadata
Binary file not shown.
Binary file added encodings/alp/goldenfiles/alprd.metadata
Binary file not shown.
28 changes: 26 additions & 2 deletions encodings/alp/src/alp/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ impl_encoding!("vortex.alp", ids::ALP, ALP);

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ALPMetadata {
exponents: Exponents,
patches: Option<PatchesMetadata>,
pub(crate) exponents: Exponents,
pub(crate) patches: Option<PatchesMetadata>,
}

impl Display for ALPMetadata {
Expand Down Expand Up @@ -150,3 +150,27 @@ impl VisitorVTable<ALPArray> for ALPEncoding {
}

impl StatisticsVTable<ALPArray> for ALPEncoding {}

#[cfg(test)]
mod tests {
use vortex_array::patches::PatchesMetadata;
use vortex_array::test_utils::check_metadata;
use vortex_dtype::PType;

use crate::{ALPMetadata, Exponents};

#[cfg_attr(miri, ignore)]
#[test]
fn test_alp_metadata() {
check_metadata(
"alp.metadata",
ALPMetadata {
patches: Some(PatchesMetadata::new(usize::MAX, PType::U64)),
exponents: Exponents {
e: u8::MAX,
f: u8::MAX,
},
},
);
}
}
20 changes: 19 additions & 1 deletion encodings/alp/src/alp_rd/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,27 @@ impl ArrayTrait for ALPRDArray {}
mod test {
use rstest::rstest;
use vortex_array::array::PrimitiveArray;
use vortex_array::patches::PatchesMetadata;
use vortex_array::test_utils::check_metadata;
use vortex_array::{IntoArrayData, IntoCanonical};
use vortex_dtype::PType;

use crate::{alp_rd, ALPRDFloat};
use crate::{alp_rd, ALPRDFloat, ALPRDMetadata};

#[cfg_attr(miri, ignore)]
#[test]
fn test_alprd_metadata() {
check_metadata(
"alprd.metadata",
ALPRDMetadata {
right_bit_width: u8::MAX,
patches: Some(PatchesMetadata::new(usize::MAX, PType::U64)),
dict: [0u16; 8],
left_parts_ptype: PType::U64,
dict_len: 8,
},
);
}

#[rstest]
#[case(vec![0.1f32.next_up(); 1024], 1.123_848_f32)]
Expand Down
3 changes: 3 additions & 0 deletions encodings/bytebool/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-scalar = { workspace = true }

[dev-dependencies]
vortex-array = { path = "../../vortex-array", features = ["test_util"] }
Binary file added encodings/bytebool/goldenfiles/bytebool.metadata
Binary file not shown.
12 changes: 12 additions & 0 deletions encodings/bytebool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,22 @@ impl VisitorVTable<ByteBoolArray> for ByteBoolEncoding {

#[cfg(test)]
mod tests {
use vortex_array::test_utils::check_metadata;
use vortex_array::validity::ArrayValidity;

use super::*;

#[cfg_attr(miri, ignore)]
#[test]
fn test_bytebool_metadata() {
check_metadata(
"bytebool.metadata",
ByteBoolMetadata {
validity: ValidityMetadata::AllValid,
},
);
}

#[test]
fn test_validity_construction() {
let v = vec![true, false];
Expand Down
4 changes: 3 additions & 1 deletion encodings/datetime-parts/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ readme = { workspace = true }
workspace = true

[dependencies]
itertools = { workspace = true }
serde = { workspace = true, features = ["derive"] }
vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-datetime-dtype = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-scalar = { workspace = true }

[dev-dependencies]
vortex-array = { path = "../../vortex-array", features = ["test_util"] }
Binary file not shown.
21 changes: 21 additions & 0 deletions encodings/datetime-parts/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,24 @@ impl VisitorVTable<DateTimePartsArray> for DateTimePartsEncoding {
visitor.visit_child("subsecond", &array.subsecond())
}
}

#[cfg(test)]
mod test {
use vortex_array::test_utils::check_metadata;
use vortex_dtype::PType;

use crate::DateTimePartsMetadata;

#[cfg_attr(miri, ignore)]
#[test]
fn test_datetimeparts_metadata() {
check_metadata(
"datetimeparts.metadata",
DateTimePartsMetadata {
days_ptype: PType::I64,
seconds_ptype: PType::I64,
subseconds_ptype: PType::I64,
},
);
}
}
1 change: 1 addition & 0 deletions encodings/dict/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ workspace = true
[dev-dependencies]
criterion = { workspace = true }
rand = { workspace = true }
vortex-array = { path = "../../vortex-array", features = ["test_util"] }

[[bench]]
name = "dict_compress"
Expand Down
Binary file added encodings/dict/goldenfiles/dict.metadata
Binary file not shown.
20 changes: 20 additions & 0 deletions encodings/dict/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,23 @@ impl VisitorVTable<DictArray> for DictEncoding {
visitor.visit_child("codes", &array.codes())
}
}

#[cfg(test)]
mod test {
use vortex_array::test_utils::check_metadata;
use vortex_dtype::PType;

use crate::DictMetadata;

#[cfg_attr(miri, ignore)]
#[test]
fn test_dict_metadata() {
check_metadata(
"dict.metadata",
DictMetadata {
codes_ptype: PType::U64,
values_len: usize::MAX,
},
);
}
}
1 change: 1 addition & 0 deletions encodings/fastlanes/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ vortex-scalar = { workspace = true }
[dev-dependencies]
criterion = { workspace = true }
rand = { workspace = true }
vortex-array = { path = "../../vortex-array", features = ["test_util"] }

[[bench]]
name = "bitpacking_take"
Expand Down
Binary file added encodings/fastlanes/goldenfiles/bitpacked.metadata
Binary file not shown.
Binary file added encodings/fastlanes/goldenfiles/delta.metadata
Binary file not shown.
Binary file added encodings/fastlanes/goldenfiles/for.metadata
Binary file not shown.
20 changes: 19 additions & 1 deletion encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,10 +292,28 @@ impl PrimitiveArrayTrait for BitPackedArray {}
#[cfg(test)]
mod test {
use vortex_array::array::PrimitiveArray;
use vortex_array::patches::PatchesMetadata;
use vortex_array::test_utils::check_metadata;
use vortex_array::validity::ValidityMetadata;
use vortex_array::{IntoArrayData, IntoArrayVariant, IntoCanonical};
use vortex_buffer::Buffer;
use vortex_dtype::PType;

use crate::BitPackedArray;
use crate::{BitPackedArray, BitPackedMetadata};

#[cfg_attr(miri, ignore)]
#[test]
fn test_bitpacked_metadata() {
check_metadata(
"bitpacked.metadata",
BitPackedMetadata {
patches: Some(PatchesMetadata::new(usize::MAX, PType::U64)),
validity: ValidityMetadata::AllValid,
offset: u16::MAX,
bit_width: u8::MAX,
},
);
}

#[test]
fn test_encode() {
Expand Down
21 changes: 21 additions & 0 deletions encodings/fastlanes/src/delta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,24 @@ impl VisitorVTable<DeltaArray> for DeltaEncoding {
}

impl StatisticsVTable<DeltaArray> for DeltaEncoding {}

#[cfg(test)]
mod test {
use vortex_array::test_utils::check_metadata;
use vortex_array::validity::ValidityMetadata;

use crate::DeltaMetadata;

#[cfg_attr(miri, ignore)]
#[test]
fn test_delta_metadata() {
check_metadata(
"delta.metadata",
DeltaMetadata {
offset: u16::MAX,
validity: ValidityMetadata::AllValid,
deltas_len: u64::MAX,
},
);
}
}
20 changes: 20 additions & 0 deletions encodings/fastlanes/src/for/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,23 @@ impl VariantsVTable<FoRArray> for FoREncoding {
}

impl PrimitiveArrayTrait for FoRArray {}

#[cfg(test)]
mod test {
use vortex_array::test_utils::check_metadata;
use vortex_scalar::PValue;

use crate::FoRMetadata;

#[cfg_attr(miri, ignore)]
#[test]
fn test_for_metadata() {
check_metadata(
"for.metadata",
FoRMetadata {
reference: PValue::I64(i64::MAX),
shift: u8::MAX,
},
);
}
}
Loading

0 comments on commit 1f894d3

Please sign in to comment.