Skip to content

Commit

Permalink
Add pylibcudf.null_mask.null_count (#17711)
Browse files Browse the repository at this point in the history
A small step to not have `null_count` tied to `cudf._lib.column.Column`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: #17711
  • Loading branch information
mroeschke authored Jan 14, 2025
1 parent 41215e2 commit e0dac5d
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 19 deletions.
3 changes: 0 additions & 3 deletions python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ from pylibcudf.libcudf.column.column_view cimport (
from pylibcudf.libcudf.types cimport size_type
from rmm.librmm.device_buffer cimport device_buffer

cdef dtype_from_lists_column_view(column_view cv)
cdef dtype_from_column_view(column_view cv)

cdef class Column:
Expand Down Expand Up @@ -42,5 +41,3 @@ cdef class Column:

@staticmethod
cdef Column from_column_view(column_view, object)

cdef size_type compute_null_count(self) except? 0
23 changes: 9 additions & 14 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ from pylibcudf.libcudf.column.column_factories cimport (
)
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from pylibcudf.libcudf.scalar.scalar cimport scalar

from cudf._lib.scalar cimport DeviceScalar
Expand Down Expand Up @@ -346,7 +345,15 @@ cdef class Column:
@property
def null_count(self):
if self._null_count is None:
self._null_count = self.compute_null_count()
if not self.nullable or self.size == 0:
self._null_count = 0
else:
with acquire_spill_lock():
self._null_count = pylibcudf.null_mask.null_count(
self.base_mask.get_ptr(mode="read"),
self.offset,
self.offset + self.size
)
return self._null_count

@property
Expand Down Expand Up @@ -410,18 +417,6 @@ cdef class Column:
else:
return other_col

cdef libcudf_types.size_type compute_null_count(self) except? 0:
with acquire_spill_lock():
if not self.nullable:
return 0
return cpp_null_count(
<libcudf_types.bitmask_type*><uintptr_t>(
self.base_mask.get_ptr(mode="read")
),
self.offset,
self.offset + self.size
)

cdef mutable_column_view mutable_view(self) except *:
if isinstance(self.dtype, cudf.CategoricalDtype):
col = self.base_children[0]
Expand Down
4 changes: 3 additions & 1 deletion python/pylibcudf/pylibcudf/null_mask.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from pylibcudf.libcudf.types cimport mask_state, size_type

Expand All @@ -16,3 +16,5 @@ cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)
cpdef tuple bitmask_and(list columns)

cpdef tuple bitmask_or(list columns)

cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop)
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/null_mask.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ def create_null_mask(
) -> DeviceBuffer: ...
def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
def null_count(bitmask: int, start: int, stop: int) -> int: ...
27 changes: 26 additions & 1 deletion python/pylibcudf/pylibcudf/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from libcpp.memory cimport make_unique
from libcpp.pair cimport pair
from libcpp.utility cimport move
from pylibcudf.libcudf cimport null_mask as cpp_null_mask
from pylibcudf.libcudf.types cimport mask_state, size_type
from pylibcudf.utils cimport int_to_bitmask_ptr

from rmm.librmm.device_buffer cimport device_buffer
from rmm.pylibrmm.device_buffer cimport DeviceBuffer
Expand All @@ -20,6 +21,7 @@ __all__ = [
"bitmask_or",
"copy_bitmask",
"create_null_mask",
"null_count",
]

cdef DeviceBuffer buffer_to_python(device_buffer buf):
Expand Down Expand Up @@ -148,3 +150,26 @@ cpdef tuple bitmask_or(list columns):
c_result = cpp_null_mask.bitmask_or(c_table.view())

return buffer_to_python(move(c_result.first)), c_result.second


cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop):
"""Given a validity bitmask, counts the number of null elements.
For details, see :cpp:func:`null_count`.
Parameters
----------
bitmask : int
Integer pointer to the bitmask.
start : int
Index of the first bit to count (inclusive).
stop : int
Index of the last bit to count (exclusive).
Returns
-------
int
The number of null elements in the specified range.
"""
with nogil:
return cpp_null_mask.null_count(int_to_bitmask_ptr(bitmask), start, stop)

0 comments on commit e0dac5d

Please sign in to comment.