Skip to content

Commit

Permalink
Fix sparse utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
viclafargue committed Jan 14, 2025
1 parent 1b62c41 commit 9c85366
Show file tree
Hide file tree
Showing 18 changed files with 94 additions and 94 deletions.
2 changes: 1 addition & 1 deletion cpp/include/raft/cluster/detail/connectivities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::KNN_GRAPH, value_idx,
});

raft::sparse::convert::sorted_coo_to_csr(
knn_graph_coo.rows(), knn_graph_coo.nnz, indptr.data(), m + 1, stream);
knn_graph_coo.rows(), (value_idx)knn_graph_coo.nnz, indptr.data(), m + 1, stream);

// TODO: Wouldn't need to copy here if we could compute knn
// graph directly on the device uvectors
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/raft/cluster/detail/mst.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void connect_knn_graph(

rmm::device_uvector<value_idx> indptr2(m + 1, stream);
raft::sparse::convert::sorted_coo_to_csr(
connected_edges.rows(), connected_edges.nnz, indptr2.data(), m + 1, stream);
connected_edges.rows(), (value_idx)connected_edges.nnz, indptr2.data(), m + 1, stream);

// On the second call, we hand the MST the original colors
// and the new set of edges and let it restart the optimization process
Expand Down Expand Up @@ -204,4 +204,4 @@ void build_sorted_mst(
raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream);
}

}; // namespace raft::cluster::detail
}; // namespace raft::cluster::detail
10 changes: 5 additions & 5 deletions cpp/include/raft/sparse/convert/csr.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ void coo_to_csr(raft::resources const& handle,
* @param m: number of rows in dense matrix
* @param stream: cuda stream to use
*/
template <typename T>
void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream)
template <typename T, typename outT>
void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream)
{
detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream);
}
Expand All @@ -65,10 +65,10 @@ void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t s
* @param row_ind: output row indices array
* @param stream: cuda stream to use
*/
template <typename T>
void sorted_coo_to_csr(COO<T>* coo, int* row_ind, cudaStream_t stream)
template <typename T, typename outT>
void sorted_coo_to_csr(COO<T>* coo, outT* row_ind, cudaStream_t stream)
{
detail::sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows, stream);
detail::sorted_coo_to_csr(coo->rows(), (outT)coo->nnz, row_ind, coo->n_rows, stream);
}

/**
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/raft/sparse/convert/detail/csr.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,18 @@ void coo_to_csr(raft::resources const& handle,
* @param m: number of rows in dense matrix
* @param stream: cuda stream to use
*/
template <typename T>
void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream)
template <typename T, typename outT>
void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream)
{
rmm::device_uvector<T> row_counts(m, stream);

RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, m * sizeof(T), stream));
RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(T), stream));

linalg::coo_degree(rows, nnz, row_counts.data(), stream);

// create csr compressed row index from row counts
thrust::device_ptr<T> row_counts_d = thrust::device_pointer_cast(row_counts.data());
thrust::device_ptr<T> c_ind_d = thrust::device_pointer_cast(row_ind);
thrust::device_ptr<outT> c_ind_d = thrust::device_pointer_cast(row_ind);
exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d);
}

Expand Down
15 changes: 7 additions & 8 deletions cpp/include/raft/sparse/detail/coo.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class COO {
rmm::device_uvector<T> vals_arr;

public:
Index_Type nnz;
uint64_t nnz;
Index_Type n_rows;
Index_Type n_cols;

Expand All @@ -75,7 +75,7 @@ class COO {
COO(rmm::device_uvector<Index_Type>& rows,
rmm::device_uvector<Index_Type>& cols,
rmm::device_uvector<T>& vals,
Index_Type nnz,
uint64_t nnz,
Index_Type n_rows = 0,
Index_Type n_cols = 0)
: rows_arr(rows), cols_arr(cols), vals_arr(vals), nnz(nnz), n_rows(n_rows), n_cols(n_cols)
Expand All @@ -90,7 +90,7 @@ class COO {
* @param init: initialize arrays with zeros
*/
COO(cudaStream_t stream,
Index_Type nnz,
uint64_t nnz,
Index_Type n_rows = 0,
Index_Type n_cols = 0,
bool init = true)
Expand Down Expand Up @@ -121,7 +121,7 @@ class COO {
*/
bool validate_size() const
{
if (this->nnz < 0 || n_rows < 0 || n_cols < 0) return false;
if (this->nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false;
return true;
}

Expand Down Expand Up @@ -204,7 +204,7 @@ class COO {
* @param init: should values be initialized to 0?
* @param stream: CUDA stream to use
*/
void allocate(Index_Type nnz, bool init, cudaStream_t stream)
void allocate(uint64_t nnz, bool init, cudaStream_t stream)
{
this->allocate(nnz, 0, init, stream);
}
Expand All @@ -216,7 +216,7 @@ class COO {
* @param init: should values be initialized to 0?
* @param stream: CUDA stream to use
*/
void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream)
void allocate(uint64_t nnz, Index_Type size, bool init, cudaStream_t stream)
{
this->allocate(nnz, size, size, init, stream);
}
Expand All @@ -229,8 +229,7 @@ class COO {
* @param init: should values be initialized to 0?
* @param stream: stream to use for init
*/
void allocate(
Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
void allocate(uint64_t nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
{
this->n_rows = n_rows;
this->n_cols = n_cols;
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/sparse/detail/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ void iota_fill(value_idx* indices, value_idx nrows, value_idx ncols, cudaStream_
iota_fill_block_kernel<<<nrows, blockdim, 0, stream>>>(indices, ncols);
}

template <typename T>
__device__ int get_stop_idx(T row, T m, T nnz, const T* ind)
template <typename T, typename indT>
__device__ indT get_stop_idx(T row, T m, indT nnz, const indT* ind)
{
int stop_idx = 0;
indT stop_idx = 0;
if (row < (m - 1))
stop_idx = ind[row + 1];
else
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/sparse/linalg/degree.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace linalg {
* @param stream: cuda stream to use
*/
template <typename T = int>
void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream)
void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream)
{
detail::coo_degree<64, T>(rows, nnz, results, stream);
}
Expand Down Expand Up @@ -66,7 +66,7 @@ void coo_degree(COO<T>* in, int* results, cudaStream_t stream)
*/
template <typename T>
void coo_degree_scalar(
const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0)
const int* rows, const T* vals, uint64_t nnz, T scalar, int* results, cudaStream_t stream = 0)
{
detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream);
}
Expand Down Expand Up @@ -120,4 +120,4 @@ void coo_degree_nz(COO<T>* in, int* results, cudaStream_t stream)
}; // end NAMESPACE sparse
}; // end NAMESPACE raft

#endif
#endif
20 changes: 10 additions & 10 deletions cpp/include/raft/sparse/linalg/detail/degree.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ namespace detail {
* @param nnz the size of the rows array
* @param results array to place results
*/
template <int TPB_X = 64, typename T = int>
RAFT_KERNEL coo_degree_kernel(const T* rows, int nnz, T* results)
template <uint64_t TPB_X = 64, typename T = int>
RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, T* results)
{
int row = (blockIdx.x * TPB_X) + threadIdx.x;
uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x;
if (row < nnz) { atomicAdd(results + rows[row], (T)1); }
}

Expand All @@ -54,8 +54,8 @@ RAFT_KERNEL coo_degree_kernel(const T* rows, int nnz, T* results)
* @param results: output result array
* @param stream: cuda stream to use
*/
template <int TPB_X = 64, typename T = int>
void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream)
template <uint64_t TPB_X = 64, typename T = int>
void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream)
{
dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1);
dim3 blk_rc(TPB_X, 1, 1);
Expand All @@ -71,11 +71,11 @@ RAFT_KERNEL coo_degree_nz_kernel(const int* rows, const T* vals, int nnz, int* r
if (row < nnz && vals[row] != 0.0) { raft::myAtomicAdd(results + rows[row], 1); }
}

template <int TPB_X = 64, typename T>
template <uint64_t TPB_X = 64, typename T>
RAFT_KERNEL coo_degree_scalar_kernel(
const int* rows, const T* vals, int nnz, T scalar, int* results)
const int* rows, const T* vals, uint64_t nnz, T scalar, int* results)
{
int row = (blockIdx.x * TPB_X) + threadIdx.x;
uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x;
if (row < nnz && vals[row] != scalar) { raft::myAtomicAdd(results + rows[row], 1); }
}

Expand All @@ -90,9 +90,9 @@ RAFT_KERNEL coo_degree_scalar_kernel(
* @param results: output row counts
* @param stream: cuda stream to use
*/
template <int TPB_X = 64, typename T>
template <uint64_t TPB_X = 64, typename T>
void coo_degree_scalar(
const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0)
const int* rows, const T* vals, uint64_t nnz, T scalar, int* results, cudaStream_t stream = 0)
{
dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1);
dim3 blk_rc(TPB_X, 1, 1);
Expand Down
26 changes: 13 additions & 13 deletions cpp/include/raft/sparse/linalg/detail/norm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ namespace sparse {
namespace linalg {
namespace detail {

template <int TPB_X = 64, typename T>
template <int TPB_X = 64, typename T, typename indT>
RAFT_KERNEL csr_row_normalize_l1_kernel(
// @TODO: This can be done much more parallel by
// having threads in a warp compute the sum in parallel
// over each row and then divide the values in parallel.
const int* ia, // csr row ex_scan (sorted by row)
const indT* ia, // csr row ex_scan (sorted by row)
const T* vals,
int nnz, // array of values and number of non-zeros
int m, // num rows in csr
indT nnz, // array of values and number of non-zeros
int m, // num rows in csr
T* result)
{ // output array

Expand All @@ -57,19 +57,19 @@ RAFT_KERNEL csr_row_normalize_l1_kernel(

// sum all vals_arr for row and divide each val by sum
if (row < m) {
int start_idx = ia[row];
int stop_idx = 0;
indT start_idx = ia[row];
indT stop_idx = 0;
if (row < m - 1) {
stop_idx = ia[row + 1];
} else
stop_idx = nnz;

T sum = T(0.0);
for (int j = start_idx; j < stop_idx; j++) {
for (indT j = start_idx; j < stop_idx; j++) {
sum = sum + fabs(vals[j]);
}

for (int j = start_idx; j < stop_idx; j++) {
for (indT j = start_idx; j < stop_idx; j++) {
if (sum != 0.0) {
T val = vals[j];
result[j] = val / sum;
Expand All @@ -90,11 +90,11 @@ RAFT_KERNEL csr_row_normalize_l1_kernel(
* @param result: l1 normalized data array
* @param stream: cuda stream to use
*/
template <int TPB_X = 64, typename T>
void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row)
template <int TPB_X = 64, typename T, typename indT>
void csr_row_normalize_l1(const indT* ia, // csr row ex_scan (sorted by row)
const T* vals,
int nnz, // array of values and number of non-zeros
int m, // num rows in csr
indT nnz, // array of values and number of non-zeros
int m, // num rows in csr
T* result,
cudaStream_t stream)
{ // output array
Expand Down Expand Up @@ -232,4 +232,4 @@ void rowNormCsrCaller(const IdxType* ia,
}; // end NAMESPACE detail
}; // end NAMESPACE linalg
}; // end NAMESPACE sparse
}; // end NAMESPACE raft
}; // end NAMESPACE raft
22 changes: 11 additions & 11 deletions cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,39 +48,39 @@ namespace detail {
// TODO: value_idx param needs to be used for this once FAISS is updated to use float32
// for indices so that the index types can be uniform
template <int TPB_X = 128, typename T, typename Lambda>
RAFT_KERNEL coo_symmetrize_kernel(int* row_ind,
RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind,
int* rows,
int* cols,
T* vals,
int* orows,
int* ocols,
T* ovals,
int n,
int cnnz,
uint64_t cnnz,
Lambda reduction_op)
{
int row = (blockIdx.x * TPB_X) + threadIdx.x;

if (row < n) {
int start_idx = row_ind[row]; // each thread processes one row
int stop_idx = get_stop_idx(row, n, cnnz, row_ind);
uint64_t start_idx = row_ind[row]; // each thread processes one row
uint64_t stop_idx = get_stop_idx(row, n, cnnz, row_ind);

int row_nnz = 0;
int out_start_idx = start_idx * 2;
int row_nnz = 0;
uint64_t out_start_idx = start_idx * 2;

for (int idx = 0; idx < stop_idx - start_idx; idx++) {
int cur_row = rows[idx + start_idx];
int cur_col = cols[idx + start_idx];
T cur_val = vals[idx + start_idx];

int lookup_row = cur_col;
int t_start = row_ind[lookup_row]; // Start at
int t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind);
int lookup_row = cur_col;
uint64_t t_start = row_ind[lookup_row]; // Start at
uint64_t t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind);

T transpose = 0.0;

bool found_match = false;
for (int t_idx = t_start; t_idx < t_stop; t_idx++) {
for (uint64_t t_idx = t_start; t_idx < t_stop; t_idx++) {
// If we find a match, let's get out of the loop. We won't
// need to modify the transposed value, since that will be
// done in a different thread.
Expand Down Expand Up @@ -142,7 +142,7 @@ void coo_symmetrize(COO<T>* in,

ASSERT(!out->validate_mem(), "Expecting unallocated COO for output");

rmm::device_uvector<int> in_row_ind(in->n_rows, stream);
rmm::device_uvector<uint64_t> in_row_ind(in->n_rows, stream);

convert::sorted_coo_to_csr(in, in_row_ind.data(), stream);

Expand Down
10 changes: 5 additions & 5 deletions cpp/include/raft/sparse/linalg/norm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ namespace linalg {
* @param result: l1 normalized data array
* @param stream: cuda stream to use
*/
template <typename T>
void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row)
template <typename T, typename indT>
void csr_row_normalize_l1(const indT* ia, // csr row ex_scan (sorted by row)
const T* vals,
int nnz, // array of values and number of non-zeros
int m, // num rows in csr
indT nnz, // array of values and number of non-zeros
int m, // num rows in csr
T* result,
cudaStream_t stream)
{ // output array
Expand Down Expand Up @@ -104,4 +104,4 @@ void rowNormCsr(raft::resources const& handle,
}; // end NAMESPACE sparse
}; // end NAMESPACE raft

#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ void perform_1nn(raft::resources const& handle,
// the color components.
auto colors_group_idxs = raft::make_device_vector<value_idx, value_idx>(handle, n_components + 1);
raft::sparse::convert::sorted_coo_to_csr(
colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream);
colors, (value_idx)n_rows, colors_group_idxs.data_handle(), n_components + 1, stream);

auto group_idxs_view = raft::make_device_vector_view<const value_idx, value_idx>(
colors_group_idxs.data_handle() + 1, n_components);
Expand Down
Loading

0 comments on commit 9c85366

Please sign in to comment.