Skip to content

Commit

Permalink
fix some type issue
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed Jan 5, 2023
1 parent 99c8d73 commit b7d6aaa
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 21 deletions.
3 changes: 1 addition & 2 deletions common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,7 @@ void fallback_transpose(const std::shared_ptr<const DefaultExecutor> exec,
thrust::device_pointer_cast(out_row_idxs.get_data()),
thrust::device_pointer_cast(out_col_idxs),
thrust::device_pointer_cast(permutation.get_data())));
using tuple_type =
thrust::tuple<IndexType, IndexType, device_type<ValueType>>;
using tuple_type = thrust::tuple<IndexType, IndexType, IndexType>;
thrust::sort(thrust::device, zip_it, zip_it + nnzb,
[] __device__(const tuple_type& a, const tuple_type& b) {
return thrust::tie(thrust::get<0>(a), thrust::get<1>(a)) <
Expand Down
11 changes: 7 additions & 4 deletions common/unified/multigrid/pgm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ void find_strongest_neighbor(
exec,
[] GKO_KERNEL(auto row, auto row_ptrs, auto col_idxs, auto weight_vals,
auto diag, auto agg, auto strongest_neighbor) {
auto max_weight_unagg = zero<ValueType>();
auto max_weight_agg = zero<ValueType>();
using value_type = device_type<ValueType>;
auto max_weight_unagg = zero<value_type>();
auto max_weight_agg = zero<value_type>();
IndexType strongest_unagg = -1;
IndexType strongest_agg = -1;
if (agg[row] != -1) {
Expand Down Expand Up @@ -267,7 +268,8 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
if (agg_val[row] != -1) {
return;
}
ValueType max_weight_agg = zero<ValueType>();
using value_type = device_type<ValueType>;
value_type max_weight_agg = zero<value_type>();
IndexType strongest_agg = -1;
for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) {
auto col = col_idxs[idx];
Expand Down Expand Up @@ -304,7 +306,8 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
if (agg_val[row] != -1) {
return;
}
ValueType max_weight_agg = zero<ValueType>();
using value_type = device_type<ValueType>;
value_type max_weight_agg = zero<value_type>();
IndexType strongest_agg = -1;
for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) {
auto col = col_idxs[idx];
Expand Down
8 changes: 4 additions & 4 deletions cuda/factorization/par_ilut_filter_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ void threshold_filter(syn::value_list<int, subwarp_size>,
if (num_blocks > 0) {
kernel::threshold_filter_nnz<subwarp_size>
<<<num_blocks, default_block_size>>>(
old_row_ptrs, as_cuda_type(old_vals), num_rows, threshold,
new_row_ptrs, lower);
old_row_ptrs, as_cuda_type(old_vals), num_rows,
as_cuda_type(threshold), new_row_ptrs, lower);
}

// build row pointers
Expand Down Expand Up @@ -126,8 +126,8 @@ void threshold_filter(syn::value_list<int, subwarp_size>,
kernel::threshold_filter<subwarp_size>
<<<num_blocks, default_block_size>>>(
old_row_ptrs, old_col_idxs, as_cuda_type(old_vals), num_rows,
threshold, new_row_ptrs, new_row_idxs, new_col_idxs,
as_cuda_type(new_vals), lower);
as_cuda_type(threshold), new_row_ptrs, new_row_idxs,
new_col_idxs, as_cuda_type(new_vals), lower);
}
}

Expand Down
6 changes: 3 additions & 3 deletions cuda/factorization/par_ilut_select_common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ void sampleselect_count(std::shared_ptr<const DefaultExecutor> exec,
static_cast<IndexType>(ceildiv(num_threads_total, default_block_size));
// pick sample, build searchtree
kernel::build_searchtree<<<1, bucket_count>>>(as_cuda_type(values), size,
tree);
as_cuda_type(tree));
// determine bucket sizes
if (num_blocks > 0) {
kernel::count_buckets<<<num_blocks, default_block_size>>>(
as_cuda_type(values), size, tree, partial_counts, oracles,
items_per_thread);
as_cuda_type(values), size, as_cuda_type(tree), partial_counts,
oracles, items_per_thread);
}
// compute prefix sum and total sum over block-local values
kernel::block_prefix_sum<<<bucket_count, default_block_size>>>(
Expand Down
4 changes: 2 additions & 2 deletions cuda/factorization/par_ilut_select_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ void sampleselect_filter(const ValueType* values, IndexType size,
static_cast<IndexType>(ceildiv(num_threads_total, default_block_size));
if (num_blocks > 0) {
kernel::filter_bucket<<<num_blocks, default_block_size>>>(
as_cuda_type(values), size, bucket, oracles, partial_counts, out,
items_per_thread);
as_cuda_type(values), size, bucket, oracles, partial_counts,
as_cuda_type(out), items_per_thread);
}
}

Expand Down
8 changes: 5 additions & 3 deletions hip/factorization/par_ilut_filter_kernel.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ void threshold_filter(syn::value_list<int, subwarp_size>,
hipLaunchKernelGGL(
HIP_KERNEL_NAME(kernel::threshold_filter_nnz<subwarp_size>),
num_blocks, default_block_size, 0, 0, old_row_ptrs,
as_hip_type(old_vals), num_rows, threshold, new_row_ptrs, lower);
as_hip_type(old_vals), num_rows, as_hip_type(threshold),
new_row_ptrs, lower);
}

// build row pointers
Expand Down Expand Up @@ -129,8 +130,9 @@ void threshold_filter(syn::value_list<int, subwarp_size>,
hipLaunchKernelGGL(
HIP_KERNEL_NAME(kernel::threshold_filter<subwarp_size>), num_blocks,
default_block_size, 0, 0, old_row_ptrs, old_col_idxs,
as_hip_type(old_vals), num_rows, threshold, new_row_ptrs,
new_row_idxs, new_col_idxs, as_hip_type(new_vals), lower);
as_hip_type(old_vals), num_rows, as_hip_type(threshold),
new_row_ptrs, new_row_idxs, new_col_idxs, as_hip_type(new_vals),
lower);
}
}

Expand Down
6 changes: 4 additions & 2 deletions hip/factorization/par_ilut_select_common.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@ void sampleselect_count(std::shared_ptr<const DefaultExecutor> exec,
static_cast<IndexType>(ceildiv(num_threads_total, default_block_size));
// pick sample, build searchtree
hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::build_searchtree), 1,
bucket_count, 0, 0, as_hip_type(values), size, tree);
bucket_count, 0, 0, as_hip_type(values), size,
as_hip_type(tree));
// determine bucket sizes
if (num_blocks > 0) {
hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::count_buckets), num_blocks,
default_block_size, 0, 0, as_hip_type(values), size,
tree, partial_counts, oracles, items_per_thread);
as_hip_type(tree), partial_counts, oracles,
items_per_thread);
}
// compute prefix sum and total sum over block-local values
hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::block_prefix_sum), bucket_count,
Expand Down
2 changes: 1 addition & 1 deletion hip/factorization/par_ilut_select_kernel.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void sampleselect_filter(const ValueType* values, IndexType size,
if (num_blocks > 0) {
hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::filter_bucket), num_blocks,
default_block_size, 0, 0, as_hip_type(values), size,
bucket, oracles, partial_counts, out,
bucket, oracles, partial_counts, as_hip_type(out),
items_per_thread);
}
}
Expand Down

0 comments on commit b7d6aaa

Please sign in to comment.