rapidsai · PointKernel · Jan 9, 2025 · Jan 10, 2025 · Jan 10, 2025
@@ -73,7 +73,7 @@ CUDF_HOST_DEVICE constexpr S round_up_safe(S number_to_round, S modulus)
  * `modulus` is positive and does not check for overflow.
  */
 template <typename S>
-constexpr S round_down_safe(S number_to_round, S modulus) noexcept
+CUDF_HOST_DEVICE constexpr S round_down_safe(S number_to_round, S modulus) noexcept
 {
   auto remainder    = number_to_round % modulus;
   auto rounded_down = number_to_round - remainder;
@@ -113,24 +113,26 @@ CUDF_HOST_DEVICE constexpr S round_up_unsafe(S number_to_round, S modulus) noexc
  * the result will be incorrect
  */
 template <typename S, typename T>
-constexpr S div_rounding_up_unsafe(S const& dividend, T const& divisor) noexcept
+CUDF_HOST_DEVICE constexpr S div_rounding_up_unsafe(S const& dividend, T const& divisor) noexcept
 {
   return (dividend + divisor - 1) / divisor;
 }
 
 namespace detail {
 template <typename I>
-constexpr I div_rounding_up_safe(std::integral_constant<bool, false>,
-                                 I dividend,
-                                 I divisor) noexcept
+CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(std::integral_constant<bool, false>,
+                                                  I dividend,
+                                                  I divisor) noexcept
 {
   // TODO: This could probably be implemented faster
   return (dividend > divisor) ? 1 + div_rounding_up_unsafe(dividend - divisor, divisor)
                               : (dividend > 0);
 }
 
 template <typename I>
-constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend, I divisor) noexcept
+CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(std::integral_constant<bool, true>,
+                                                  I dividend,
+                                                  I divisor) noexcept
 {
   auto quotient  = dividend / divisor;
   auto remainder = dividend % divisor;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -54,7 +54,7 @@ auto to_string(T value) -> std::string
 }
 
 template <typename T>
-constexpr auto abs(T value)
+CUDF_HOST_DEVICE constexpr auto abs(T value)
 {
   return value >= 0 ? value : -value;
 }
@@ -72,7 +72,7 @@ CUDF_HOST_DEVICE inline auto max(T lhs, T rhs)
 }
 
 template <typename BaseType>
-constexpr auto exp10(int32_t exponent)
+CUDF_HOST_DEVICE constexpr auto exp10(int32_t exponent)
 {
   BaseType value = 1;
   while (exponent > 0)

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@
 
 #include <cudf/utilities/export.hpp>
 
+#include <cuda/functional>
+
 #include <cstdint>
 
 namespace CUDF_EXPORT cudf {
@@ -45,7 +47,7 @@ struct multistate {
    *
    * @note: The behavior of this function is undefined if size() => max_segment_count
    */
-  constexpr void enqueue(uint8_t head, uint8_t tail)
+  CUDF_HOST_DEVICE constexpr void enqueue(uint8_t head, uint8_t tail)
   {
     _heads |= (head & 0xFu) << (_size * 4);
     _tails |= (tail & 0xFu) << (_size * 4);
@@ -55,17 +57,17 @@ struct multistate {
   /**
    * @brief get's the number of segments this multistate represents
    */
-  [[nodiscard]] constexpr uint8_t size() const { return _size; }
+  [[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t size() const { return _size; }
 
   /**
    * @brief get's the highest (____, tail] value this multistate represents
    */
-  [[nodiscard]] constexpr uint8_t max_tail() const
+  [[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t max_tail() const
   {
     uint8_t maximum = 0;
 
     for (uint8_t i = 0; i < _size; i++) {
-      maximum = std::max(maximum, get_tail(i));
+      maximum = cuda::std::max(maximum, get_tail(i));
     }
 
     return maximum;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 #include <cudf/fixed_point/temporary.hpp>
 
+#include <cuda/std/limits>
 #include <cuda/std/optional>
 #include <cuda/std/type_traits>
 #include <thrust/pair.h>
@@ -46,7 +47,7 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
   // highest value where another decimal digit cannot be appended without an overflow;
   // this preserves the most digits when scaling the final result for this type
   constexpr UnsignedDecimalType decimal_max =
-    (std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;
+    (cuda::std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;
 
   __uint128_t value  = 0;  // for checking overflow
   int32_t exp_offset = 0;
@@ -90,7 +91,8 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
 template <bool check_only = false>
 __device__ cuda::std::optional<int32_t> parse_exponent(char const* iter, char const* iter_end)
 {
-  constexpr uint32_t exponent_max = static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
+  constexpr uint32_t exponent_max =
+    static_cast<uint32_t>(cuda::std::numeric_limits<int32_t>::max());
 
   // get optional exponent sign
   int32_t const exp_sign = [&iter] {

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 
 #include <cudf/strings/detail/convert/int_to_string.cuh>
 
+#include <cuda/std/functional>
+
 namespace cudf::strings::detail {
 
 /**
@@ -33,7 +35,7 @@ __device__ inline int32_t fixed_point_string_size(__int128_t const& value, int32
   auto const abs_value = numeric::detail::abs(value);
   auto const exp_ten   = numeric::detail::exp10<__int128_t>(-scale);
   auto const fraction  = count_digits(abs_value % exp_ten);
-  auto const num_zeros = std::max(0, (-scale - fraction));
+  auto const num_zeros = cuda::std::max(0, (-scale - fraction));
   return static_cast<int32_t>(value < 0) +    // sign if negative
          count_digits(abs_value / exp_ten) +  // integer
          1 +                                  // decimal point
@@ -66,7 +68,7 @@ __device__ inline void fixed_point_to_string(__int128_t const& value, int32_t sc
   if (value < 0) *out_ptr++ = '-';  // add sign
   auto const abs_value = numeric::detail::abs(value);
   auto const exp_ten   = numeric::detail::exp10<__int128_t>(-scale);
-  auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten)));
+  auto const num_zeros = cuda::std::max(0, (-scale - count_digits(abs_value % exp_ten)));
 
   out_ptr += integer_to_string(abs_value / exp_ten, out_ptr);  // add the integer part
   *out_ptr++ = '.';                                            // add decimal point

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,7 +67,7 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer)
  * @return size_type number of digits in input value
  */
 template <typename IntegerType>
-constexpr size_type count_digits(IntegerType value)
+__device__ constexpr size_type count_digits(IntegerType value)
 {
   if (value == 0) return 1;
   bool const is_negative = cuda::std::is_signed<IntegerType>() ? (value < 0) : false;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -129,13 +129,15 @@ std::pair<std::unique_ptr<column>, int64_t> make_offsets_child_column(
     data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
   auto d_offsets = offsets_column->mutable_view().template data<int32_t>();
 
+  auto const data = device_span<cudf::size_type>{begin, static_cast<std::size_t>(lcount)};
+
   // The number of offsets is strings_count+1 so to build the offsets from the sizes
   // using exclusive-scan technically requires strings_count+1 input values even though
   // the final input value is never used.
   // The input iterator is wrapped here to allow the 'last value' to be safely read.
   auto map_fn = cuda::proclaim_return_type<size_type>(
-    [begin, strings_count] __device__(size_type idx) -> size_type {
-      return idx < strings_count ? static_cast<size_type>(begin[idx]) : size_type{0};
+    [data, strings_count] __device__(size_type idx) -> size_type {
+      return idx < strings_count ? data[idx] : size_type{0};
     });
   auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn);
   // Use the sizes-to-offsets iterator to compute the total number of elements

@@ -278,7 +278,10 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
    * @param idx the index of the element to access
    * @return A reference to the idx-th element of the sequence, i.e., `data()[idx]`
    */
-  constexpr typename base::reference operator[](size_type idx) const { return this->_data[idx]; }
+  CUDF_HOST_DEVICE constexpr typename base::reference operator[](size_type idx) const
+  {
+    return this->_data[idx];
+  }
 
   // not noexcept due to undefined behavior when size = 0
   /**

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -197,7 +197,7 @@ __inline__ __device__ cuda::std::chrono::hh_mm_ss<duration_ms> extract_time_of_d
 /**
  * @brief Checks whether `c` is decimal digit
  */
-constexpr bool is_digit(char c) { return c >= '0' and c <= '9'; }
+__device__ constexpr bool is_digit(char c) { return c >= '0' and c <= '9'; }
 
 /**
  * @brief Parses a datetime string and computes the corresponding timestamp.

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -707,7 +707,7 @@ struct orc_column_device_view : public column_device_view {
 struct rowgroup_rows {
   size_type begin;
   size_type end;
-  [[nodiscard]] constexpr auto size() const noexcept { return end - begin; }
+  [[nodiscard]] CUDF_HOST_DEVICE constexpr auto size() const noexcept { return end - begin; }
 };
 
 }  // namespace orc

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -211,7 +211,7 @@ __device__ inline uint8_t* pb_put_fixed64(uint8_t* p, uint32_t id, void const* r
 }
 
 // Splits a nanosecond timestamp into milliseconds and nanoseconds
-__device__ std::pair<int64_t, int32_t> split_nanosecond_timestamp(int64_t nano_count)
+__device__ cuda::std::pair<int64_t, int32_t> split_nanosecond_timestamp(int64_t nano_count)
 {
   auto const ns           = cuda::std::chrono::nanoseconds(nano_count);
   auto const ms_floor     = cuda::std::chrono::floor<cuda::std::chrono::milliseconds>(ns);

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,6 +35,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/limits>
 #include <thrust/for_each.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/transform.h>
@@ -416,8 +417,8 @@ static __device__ uint32_t IntegerRLE(
     // Find minimum and maximum values
     if (literal_run > 0) {
       // Find min & max
-      T vmin = (t < literal_run) ? v0 : std::numeric_limits<T>::max();
-      T vmax = (t < literal_run) ? v0 : std::numeric_limits<T>::min();
+      T vmin = (t < literal_run) ? v0 : cuda::std::numeric_limits<T>::max();
+      T vmax = (t < literal_run) ? v0 : cuda::std::numeric_limits<T>::min();
       uint32_t literal_mode, literal_w;
       vmin = block_reduce(temp_storage).Reduce(vmin, cub::Min());
       __syncthreads();
@@ -451,7 +452,7 @@ static __device__ uint32_t IntegerRLE(
         } else {
           uint32_t range, w;
           // Mode 2 base value cannot be bigger than max int64_t, i.e. the first bit has to be 0
-          if (vmin <= std::numeric_limits<int64_t>::max() and mode1_w > mode2_w and
+          if (vmin <= cuda::std::numeric_limits<int64_t>::max() and mode1_w > mode2_w and
               (literal_run - 1) * (mode1_w - mode2_w) > 4) {
             s->u.intrle.literal_mode = 2;
             w                        = mode2_w;

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/std/array>
 #include <thrust/copy.h>
 #include <thrust/execution_policy.h>
 
@@ -243,9 +244,9 @@ enum row_entry_state_e {
  */
 static auto __device__ index_order_from_index_types(uint32_t index_types_bitmap)
 {
-  constexpr std::array full_order = {CI_PRESENT, CI_DATA, CI_DATA2};
+  constexpr cuda::std::array full_order = {CI_PRESENT, CI_DATA, CI_DATA2};
 
-  std::array<uint32_t, full_order.size()> partial_order;
+  cuda::std::array<uint32_t, full_order.size()> partial_order;
   thrust::copy_if(thrust::seq,
                   full_order.cbegin(),
                   full_order.cend(),

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ namespace cudf::io::orc::detail {
 namespace nvcomp = cudf::io::detail::nvcomp;
 
 template <typename T>
-[[nodiscard]] constexpr int varint_size(T val)
+[[nodiscard]] CUDF_HOST_DEVICE constexpr int varint_size(T val)
 {
   auto len = 1u;
   while (val > 0x7f) {