Skip to content

Commit

Permalink
feat: teach PyArray scalar_at (#1095)
Browse files Browse the repository at this point in the history
I intentionally do not implement __getitem__ because I need to think
harder about the implications of that. For example, do we ever want
__getitem__ on an Array to get a child array of a StructArray?
  • Loading branch information
danking authored Oct 22, 2024
1 parent 75456b6 commit ed138f8
Show file tree
Hide file tree
Showing 9 changed files with 374 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ Vortex is an Apache Arrow-compatible toolkit for working with compressed array d
dtype
io
expr
scalar
6 changes: 6 additions & 0 deletions docs/scalar.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Scalar Values
=============

.. automodule:: vortex.scalar
:members:
:imported-members:
1 change: 1 addition & 0 deletions pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ tokio = { workspace = true, features = ["fs"] }

vortex-alp = { workspace = true }
vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-bytebool = { workspace = true }
vortex-datetime-parts = { workspace = true }
vortex-dict = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions pyvortex/python/vortex/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from . import encoding
from ._lib import __doc__ as module_docs
from ._lib import dtype, expr, io
from ._lib import dtype, expr, io, scalar

__doc__ = module_docs
del module_docs
array = encoding.array

__all__ = ["array", dtype, expr, io, encoding]
__all__ = ["array", dtype, expr, io, encoding, scalar]
86 changes: 84 additions & 2 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ use arrow::array::{Array as ArrowArray, ArrayRef};
use arrow::pyarrow::ToPyArrow;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{IntoPyDict, PyList};
use pyo3::types::{IntoPyDict, PyInt, PyList};
use vortex::array::ChunkedArray;
use vortex::compute::unary::fill_forward;
use vortex::compute::unary::{fill_forward, scalar_at};
use vortex::compute::{compare, slice, take, Operator};
use vortex::{Array, ArrayDType, IntoCanonical};

use crate::dtype::PyDType;
use crate::error::PyVortexError;
use crate::python_repr::PythonRepr;
use crate::scalar::scalar_into_py;

#[pyclass(name = "Array", module = "vortex", sequence, subclass)]
/// An array of zero or more *rows* each with the same set of *columns*.
Expand Down Expand Up @@ -326,6 +327,87 @@ impl PyArray {
.map(|arr| PyArray { inner: arr })
}

/// Retrieve a row by its index.
///
/// Parameters
/// ----------
/// index : :class:`int`
/// The index of interest. Must be greater than or equal to zero and less than the length of
/// this array.
///
/// Returns
/// -------
/// one of :class:`int`, :class:`float`, :class:`bool`, :class:`vortex.scalar.Buffer`, :class:`vortex.scalar.BufferString`, :class:`vortex.scalar.VortexList`, :class:`vortex.scalar.VortexStruct`
/// If this array contains numbers or Booleans, this array returns the corresponding
/// primitive Python type, i.e. int, float, and bool. For structures and variable-length
/// data types, a zero-copy view of the underlying data is returned.
///
/// Examples
/// --------
///
/// Retrieve the last element from an array of integers:
///
/// >>> vortex.encoding.array([10, 42, 999, 1992]).scalar_at(3)
/// 1992
///
/// Retrieve the third element from an array of strings:
///
/// >>> array = vortex.encoding.array(["hello", "goodbye", "it", "is"])
/// >>> array.scalar_at(2)
/// <vortex.BufferString ...>
///
/// Vortex, by default, returns a view into the array's data. This avoids copying the data,
/// which can be expensive if done repeatedly. :meth:`.BufferString.into_python` forcibly copies
/// the scalar data into a Python data structure.
///
/// >>> array.scalar_at(2).into_python()
/// 'it'
///
/// Retrieve an element from an array of structures:
///
/// >>> array = vortex.encoding.array([
/// ... {'name': 'Joseph', 'age': 25},
/// ... {'name': 'Narendra', 'age': 31},
/// ... {'name': 'Angela', 'age': 33},
/// ... None,
/// ... {'name': 'Mikhail', 'age': 57},
/// ... ])
/// >>> array.scalar_at(2).into_python()
/// {'age': 33, 'name': <vortex.BufferString ...>}
///
/// Notice that :meth:`.VortexStruct.into_python` only copies one "layer" of data into
/// Python. If we want to ensure the entire structure is recurisvely copied into Python we can
/// specify ``recursive=True``:
///
/// >>> array.scalar_at(2).into_python(recursive=True)
/// {'age': 33, 'name': 'Angela'}
///
/// Retrieve a missing element from an array of structures:
///
/// >>> array.scalar_at(3) is None
/// True
///
/// Out of bounds accesses are prohibited:
///
/// >>> vortex.encoding.array([10, 42, 999, 1992]).scalar_at(10)
/// Traceback (most recent call last):
/// ...
/// ValueError: index 10 out of bounds from 0 to 4
/// ...
///
/// Unlike Python, negative indices are not supported:
///
/// >>> vortex.encoding.array([10, 42, 999, 1992]).scalar_at(-2)
/// Traceback (most recent call last):
/// ...
/// OverflowError: can't convert negative int to unsigned
///
fn scalar_at(&self, index: &Bound<PyInt>) -> PyResult<PyObject> {
scalar_at(&self.inner, index.extract()?)
.map_err(PyVortexError::map_err)
.and_then(|scalar| scalar_into_py(index.py(), scalar, false))
}

/// Filter, permute, and/or repeat elements by their index.
///
/// Parameters
Expand Down
10 changes: 10 additions & 0 deletions pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use array::PyArray;
use expr::PyExpr;
use pyo3::prelude::*;
use scalar::{PyBuffer, PyBufferString, PyVortexList, PyVortexStruct};

mod array;
mod compress;
Expand All @@ -12,6 +13,7 @@ mod error;
mod expr;
mod io;
mod python_repr;
mod scalar;

/// Vortex is an Apache Arrow-compatible toolkit for working with compressed array data.
#[pymodule]
Expand Down Expand Up @@ -50,5 +52,13 @@ fn _lib(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
expr.add_function(wrap_pyfunction!(expr::column, m)?)?;
expr.add_class::<PyExpr>()?;

let scalar = PyModule::new_bound(py, "scalar")?;
m.add_submodule(&scalar)?;

scalar.add_class::<PyBuffer>()?;
scalar.add_class::<PyBufferString>()?;
scalar.add_class::<PyVortexList>()?;
scalar.add_class::<PyVortexStruct>()?;

Ok(())
}
Loading

0 comments on commit ed138f8

Please sign in to comment.