diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd index 026c12895e8..58745d91fc0 100644 --- a/python/cudf/cudf/_lib/column.pxd +++ b/python/cudf/cudf/_lib/column.pxd @@ -13,7 +13,6 @@ from pylibcudf.libcudf.column.column_view cimport ( from pylibcudf.libcudf.types cimport size_type from rmm.librmm.device_buffer cimport device_buffer -cdef dtype_from_lists_column_view(column_view cv) cdef dtype_from_column_view(column_view cv) cdef class Column: @@ -42,5 +41,3 @@ cdef class Column: @staticmethod cdef Column from_column_view(column_view, object) - - cdef size_type compute_null_count(self) except? 0 diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index c59bbc0f40c..114991dbe3e 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -43,7 +43,6 @@ from pylibcudf.libcudf.column.column_factories cimport ( ) from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count from pylibcudf.libcudf.scalar.scalar cimport scalar from cudf._lib.scalar cimport DeviceScalar @@ -346,7 +345,15 @@ cdef class Column: @property def null_count(self): if self._null_count is None: - self._null_count = self.compute_null_count() + if not self.nullable or self.size == 0: + self._null_count = 0 + else: + with acquire_spill_lock(): + self._null_count = pylibcudf.null_mask.null_count( + self.base_mask.get_ptr(mode="read"), + self.offset, + self.offset + self.size + ) return self._null_count @property @@ -410,18 +417,6 @@ cdef class Column: else: return other_col - cdef libcudf_types.size_type compute_null_count(self) except? 0: - with acquire_spill_lock(): - if not self.nullable: - return 0 - return cpp_null_count( - ( - self.base_mask.get_ptr(mode="read") - ), - self.offset, - self.offset + self.size - ) - cdef mutable_column_view mutable_view(self) except *: if isinstance(self.dtype, cudf.CategoricalDtype): col = self.base_children[0] diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd index 9bdfaee2842..779a5aed306 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pxd +++ b/python/pylibcudf/pylibcudf/null_mask.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from pylibcudf.libcudf.types cimport mask_state, size_type @@ -16,3 +16,5 @@ cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *) cpdef tuple bitmask_and(list columns) cpdef tuple bitmask_or(list columns) + +cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop) diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi index 1a6d96a0822..ace18582bd1 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyi +++ b/python/pylibcudf/pylibcudf/null_mask.pyi @@ -12,3 +12,4 @@ def create_null_mask( ) -> DeviceBuffer: ... def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ... def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ... +def null_count(bitmask: int, start: int, stop: int) -> int: ... diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx index adc264e9af6..0260088c0e2 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyx +++ b/python/pylibcudf/pylibcudf/null_mask.pyx @@ -1,10 +1,11 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from libcpp.memory cimport make_unique from libcpp.pair cimport pair from libcpp.utility cimport move from pylibcudf.libcudf cimport null_mask as cpp_null_mask from pylibcudf.libcudf.types cimport mask_state, size_type +from pylibcudf.utils cimport int_to_bitmask_ptr from rmm.librmm.device_buffer cimport device_buffer from rmm.pylibrmm.device_buffer cimport DeviceBuffer @@ -20,6 +21,7 @@ __all__ = [ "bitmask_or", "copy_bitmask", "create_null_mask", + "null_count", ] cdef DeviceBuffer buffer_to_python(device_buffer buf): @@ -148,3 +150,26 @@ cpdef tuple bitmask_or(list columns): c_result = cpp_null_mask.bitmask_or(c_table.view()) return buffer_to_python(move(c_result.first)), c_result.second + + +cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop): + """Given a validity bitmask, counts the number of null elements. + + For details, see :cpp:func:`null_count`. + + Parameters + ---------- + bitmask : int + Integer pointer to the bitmask. + start : int + Index of the first bit to count (inclusive). + stop : int + Index of the last bit to count (exclusive). + + Returns + ------- + int + The number of null elements in the specified range. + """ + with nogil: + return cpp_null_mask.null_count(int_to_bitmask_ptr(bitmask), start, stop)