Skip to content

Commit

Permalink
delete some redundent as_device_type
Browse files Browse the repository at this point in the history
Co-authored-by: Pratik Nayak <[email protected]>
  • Loading branch information
yhmtsai and pratikvn committed Feb 9, 2023
1 parent 9de37fa commit d7b41c7
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 55 deletions.
4 changes: 2 additions & 2 deletions common/unified/multigrid/pgm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
return;
}
using value_type = device_type<ValueType>;
value_type max_weight_agg = zero<value_type>();
auto max_weight_agg = zero<value_type>();
IndexType strongest_agg = -1;
for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) {
auto col = col_idxs[idx];
Expand Down Expand Up @@ -307,7 +307,7 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
return;
}
using value_type = device_type<ValueType>;
value_type max_weight_agg = zero<value_type>();
auto max_weight_agg = zero<value_type>();
IndexType strongest_agg = -1;
for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) {
auto col = col_idxs[idx];
Expand Down
12 changes: 4 additions & 8 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,8 @@ void load_balance_spmv(std::shared_ptr<const CudaExecutor> exec,
a->get_const_col_idxs(),
as_cuda_type(a->get_const_row_ptrs()),
as_cuda_type(a->get_const_srow()),
as_cuda_type(b->get_const_values()),
as_cuda_type(b->get_stride()),
as_cuda_type(c->get_values()),
as_cuda_type(c->get_stride()));
as_cuda_type(b->get_const_values()), b->get_stride(),
as_cuda_type(c->get_values()), c->get_stride());
}
} else {
if (csr_grid.x > 0 && csr_grid.y > 0) {
Expand All @@ -311,10 +309,8 @@ void load_balance_spmv(std::shared_ptr<const CudaExecutor> exec,
a->get_const_col_idxs(),
as_cuda_type(a->get_const_row_ptrs()),
as_cuda_type(a->get_const_srow()),
as_cuda_type(b->get_const_values()),
as_cuda_type(b->get_stride()),
as_cuda_type(c->get_values()),
as_cuda_type(c->get_stride()));
as_cuda_type(b->get_const_values()), b->get_stride(),
as_cuda_type(c->get_values()), c->get_stride());
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions cuda/solver/cb_gmres_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ void finish_arnoldi_CGS(std::shared_ptr<const CudaExecutor> exec,
stride_arnoldi, as_cuda_type(hessenberg_iter->get_values()),
stride_hessenberg, iter + 1, acc::as_cuda_range(krylov_bases),
as_cuda_type(stop_status), as_cuda_type(reorth_status),
as_cuda_type(num_reorth->get_data()));
num_reorth->get_data());
num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data());
}

Expand Down Expand Up @@ -388,8 +388,7 @@ void givens_rotation(std::shared_ptr<const CudaExecutor> exec,
givens_sin->get_stride(), as_cuda_type(givens_cos->get_values()),
givens_cos->get_stride(), as_cuda_type(residual_norm->get_values()),
as_cuda_type(residual_norm_collection->get_values()),
residual_norm_collection->get_stride(),
as_cuda_type(stop_status->get_const_data()));
residual_norm_collection->get_stride(), stop_status->get_const_data());
}


Expand All @@ -412,7 +411,7 @@ void arnoldi(std::shared_ptr<const CudaExecutor> exec,
static_cast<unsigned int>(
ceildiv(final_iter_nums->get_num_elems(), default_block_size)),
default_block_size>>>(as_cuda_type(final_iter_nums->get_data()),
as_cuda_type(stop_status->get_const_data()),
stop_status->get_const_data(),
final_iter_nums->get_num_elems());
finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter,
buffer_iter, arnoldi_norm, iter,
Expand Down
19 changes: 9 additions & 10 deletions cuda/solver/idr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void solve_lower_triangular(const size_type nrhs,
subspace_dim, nrhs, as_cuda_type(m->get_const_values()),
m->get_stride(), as_cuda_type(f->get_const_values()), f->get_stride(),
as_cuda_type(c->get_values()), c->get_stride(),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
}


Expand Down Expand Up @@ -161,7 +161,7 @@ void update_g_and_u(std::shared_ptr<const CudaExecutor> exec,
multidot_kernel<<<grid_dim, block_dim>>>(
size, nrhs, as_cuda_type(p_i), as_cuda_type(g_k->get_values()),
g_k->get_stride(), as_cuda_type(alpha->get_values()),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
} else {
cublas::dot(exec->get_cublas_handle(), size, p_i, 1,
g_k->get_values(), g_k->get_stride(),
Expand All @@ -175,14 +175,14 @@ void update_g_and_u(std::shared_ptr<const CudaExecutor> exec,
as_cuda_type(g->get_const_values()), g->get_stride(),
as_cuda_type(g_k->get_values()), g_k->get_stride(),
as_cuda_type(u->get_values()), u->get_stride(),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
}
update_g_kernel<default_block_size>
<<<ceildiv(size * g_k->get_stride(), default_block_size),
default_block_size>>>(
k, size, nrhs, as_cuda_type(g_k->get_const_values()),
g_k->get_stride(), as_cuda_type(g->get_values()), g->get_stride(),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
}
Expand Down Expand Up @@ -212,7 +212,7 @@ void update_m(std::shared_ptr<const CudaExecutor> exec, const size_type nrhs,
multidot_kernel<<<grid_dim, block_dim>>>(
size, nrhs, as_cuda_type(p_i),
as_cuda_type(g_k->get_const_values()), g_k->get_stride(),
as_cuda_type(m_i), as_cuda_type(stop_status->get_const_data()));
as_cuda_type(m_i), stop_status->get_const_data());
} else {
cublas::dot(exec->get_cublas_handle(), size, p_i, 1,
g_k->get_const_values(), g_k->get_stride(), m_i);
Expand Down Expand Up @@ -242,7 +242,7 @@ void update_x_r_and_f(std::shared_ptr<const CudaExecutor> exec,
as_cuda_type(f->get_values()), f->get_stride(),
as_cuda_type(r->get_values()), r->get_stride(),
as_cuda_type(x->get_values()), x->get_stride(),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs,
zero<ValueType>());
}
Expand Down Expand Up @@ -286,7 +286,7 @@ void step_1(std::shared_ptr<const CudaExecutor> exec, const size_type nrhs,
as_cuda_type(c->get_const_values()), c->get_stride(),
as_cuda_type(g->get_const_values()), g->get_stride(),
as_cuda_type(v->get_values()), v->get_stride(),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
}
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL);
Expand All @@ -313,7 +313,7 @@ void step_2(std::shared_ptr<const CudaExecutor> exec, const size_type nrhs,
preconditioned_vector->get_stride(),
as_cuda_type(c->get_const_values()), c->get_stride(),
as_cuda_type(u->get_values()), u->get_stride(),
as_cuda_type(stop_status->get_const_data()));
stop_status->get_const_data());
}
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL);
Expand Down Expand Up @@ -347,8 +347,7 @@ void compute_omega(
compute_omega_kernel<<<grid_dim, config::warp_size>>>(
nrhs, as_cuda_type(kappa), as_cuda_type(tht->get_const_values()),
as_cuda_type(residual_norm->get_const_values()),
as_cuda_type(omega->get_values()),
as_cuda_type(stop_status->get_const_data()));
as_cuda_type(omega->get_values()), stop_status->get_const_data());
}
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL);
Expand Down
10 changes: 4 additions & 6 deletions hip/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,8 @@ void spmv(std::shared_ptr<const HipExecutor> exec,
as_hip_type(a->get_const_values()), a->get_const_col_idxs(),
as_hip_type(a->get_const_row_ptrs()),
as_hip_type(a->get_const_srow()),
as_hip_type(b->get_const_values()),
as_hip_type(b->get_stride()), as_hip_type(c->get_values()),
as_hip_type(c->get_stride()));
as_hip_type(b->get_const_values()), b->get_stride(),
as_hip_type(c->get_values()), c->get_stride());
}
} else if (a->get_strategy()->get_name() == "merge_path") {
int items_per_thread =
Expand Down Expand Up @@ -397,9 +396,8 @@ void advanced_spmv(std::shared_ptr<const HipExecutor> exec,
as_hip_type(a->get_const_values()), a->get_const_col_idxs(),
as_hip_type(a->get_const_row_ptrs()),
as_hip_type(a->get_const_srow()),
as_hip_type(b->get_const_values()),
as_hip_type(b->get_stride()), as_hip_type(c->get_values()),
as_hip_type(c->get_stride()));
as_hip_type(b->get_const_values()), b->get_stride(),
as_hip_type(c->get_values()), c->get_stride());
}
} else if (a->get_strategy()->get_name() == "merge_path") {
int items_per_thread =
Expand Down
8 changes: 3 additions & 5 deletions hip/solver/cb_gmres_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ void finish_arnoldi_CGS(std::shared_ptr<const HipExecutor> exec,
stride_arnoldi, as_hip_type(hessenberg_iter->get_values()),
stride_hessenberg, iter + 1, acc::as_hip_range(krylov_bases),
as_hip_type(stop_status), as_hip_type(reorth_status),
as_hip_type(num_reorth->get_data()));
num_reorth->get_data());
num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data());
// num_reorth_host := number of next_krylov vector to be
// reorthogonalization
Expand Down Expand Up @@ -401,8 +401,7 @@ void givens_rotation(std::shared_ptr<const HipExecutor> exec,
givens_sin->get_stride(), as_hip_type(givens_cos->get_values()),
givens_cos->get_stride(), as_hip_type(residual_norm->get_values()),
as_hip_type(residual_norm_collection->get_values()),
residual_norm_collection->get_stride(),
as_hip_type(stop_status->get_const_data()));
residual_norm_collection->get_stride(), stop_status->get_const_data());
}


Expand All @@ -426,8 +425,7 @@ void arnoldi(std::shared_ptr<const HipExecutor> exec,
static_cast<unsigned int>(
ceildiv(final_iter_nums->get_num_elems(), default_block_size)),
default_block_size, 0, 0, as_hip_type(final_iter_nums->get_data()),
as_hip_type(stop_status->get_const_data()),
final_iter_nums->get_num_elems());
stop_status->get_const_data(), final_iter_nums->get_num_elems());
finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter,
buffer_iter, arnoldi_norm, iter,
stop_status->get_const_data(), reorth_status->get_data(),
Expand Down
27 changes: 13 additions & 14 deletions hip/solver/idr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void solve_lower_triangular(const size_type nrhs,
as_hip_type(m->get_const_values()), m->get_stride(),
as_hip_type(f->get_const_values()), f->get_stride(),
as_hip_type(c->get_values()), c->get_stride(),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
}


Expand Down Expand Up @@ -168,7 +168,7 @@ void update_g_and_u(std::shared_ptr<const HipExecutor> exec,
multidot_kernel, grid_dim, block_dim, 0, 0, size, nrhs,
as_hip_type(p_i), as_hip_type(g_k->get_values()),
g_k->get_stride(), as_hip_type(alpha->get_values()),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
} else {
hipblas::dot(exec->get_hipblas_handle(), size, p_i, 1,
g_k->get_values(), g_k->get_stride(),
Expand All @@ -183,14 +183,14 @@ void update_g_and_u(std::shared_ptr<const HipExecutor> exec,
as_hip_type(g->get_const_values()), g->get_stride(),
as_hip_type(g_k->get_values()), g_k->get_stride(),
as_hip_type(u->get_values()), u->get_stride(),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
}
hipLaunchKernelGGL(update_g_kernel<default_block_size>,
ceildiv(size * g_k->get_stride(), default_block_size),
default_block_size, 0, 0, k, size, nrhs,
as_hip_type(g_k->get_const_values()), g_k->get_stride(),
as_hip_type(g->get_values()), g->get_stride(),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
}


Expand Down Expand Up @@ -221,7 +221,7 @@ void update_m(std::shared_ptr<const HipExecutor> exec, const size_type nrhs,
nrhs, as_hip_type(p_i),
as_hip_type(g_k->get_const_values()),
g_k->get_stride(), as_hip_type(m_i),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
} else {
hipblas::dot(exec->get_hipblas_handle(), size, p_i, 1,
g_k->get_const_values(), g_k->get_stride(), m_i);
Expand Down Expand Up @@ -252,7 +252,7 @@ void update_x_r_and_f(std::shared_ptr<const HipExecutor> exec,
as_hip_type(f->get_values()), f->get_stride(),
as_hip_type(r->get_values()), r->get_stride(),
as_hip_type(x->get_values()), x->get_stride(),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs,
zero<ValueType>());
}
Expand Down Expand Up @@ -297,7 +297,7 @@ void step_1(std::shared_ptr<const HipExecutor> exec, const size_type nrhs,
residual->get_stride(), as_hip_type(c->get_const_values()),
c->get_stride(), as_hip_type(g->get_const_values()), g->get_stride(),
as_hip_type(v->get_values()), v->get_stride(),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL);
Expand All @@ -324,7 +324,7 @@ void step_2(std::shared_ptr<const HipExecutor> exec, const size_type nrhs,
as_hip_type(preconditioned_vector->get_const_values()),
preconditioned_vector->get_stride(), as_hip_type(c->get_const_values()),
c->get_stride(), as_hip_type(u->get_values()), u->get_stride(),
as_hip_type(stop_status->get_const_data()));
stop_status->get_const_data());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL);
Expand Down Expand Up @@ -355,12 +355,11 @@ void compute_omega(
matrix::Dense<ValueType>* omega, const array<stopping_status>* stop_status)
{
const auto grid_dim = ceildiv(nrhs, config::warp_size);
hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_omega_kernel), grid_dim,
config::warp_size, 0, 0, nrhs, as_hip_type(kappa),
as_hip_type(tht->get_const_values()),
as_hip_type(residual_norm->get_const_values()),
as_hip_type(omega->get_values()),
as_hip_type(stop_status->get_const_data()));
hipLaunchKernelGGL(
HIP_KERNEL_NAME(compute_omega_kernel), grid_dim, config::warp_size, 0,
0, nrhs, as_hip_type(kappa), as_hip_type(tht->get_const_values()),
as_hip_type(residual_norm->get_const_values()),
as_hip_type(omega->get_values()), stop_status->get_const_data());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL);
Expand Down
13 changes: 7 additions & 6 deletions hip/stop/residual_norm_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,13 @@ constexpr int default_block_size = 512;

template <typename ValueType>
__global__
__launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
size_type num_cols, remove_complex<ValueType> rel_residual_goal,
const ValueType* __restrict__ tau,
const remove_complex<ValueType>* __restrict__ orig_tau, uint8 stoppingId,
bool setFinalized, stopping_status* __restrict__ stop_status,
bool* __restrict__ device_storage)
__launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
size_type num_cols, remove_complex<ValueType> rel_residual_goal,
const ValueType* __restrict__ tau,
const remove_complex<ValueType>* __restrict__ orig_tau,
uint8 stoppingId, bool setFinalized,
stopping_status* __restrict__ stop_status,
bool* __restrict__ device_storage)
{
const auto tidx = thread::get_thread_id_flat();
if (tidx < num_cols) {
Expand Down

0 comments on commit d7b41c7

Please sign in to comment.