Skip to content

Commit

Permalink
some refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
mehmetyusufoglu committed Jul 19, 2024
1 parent d8a8ca1 commit f874872
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 16 deletions.
12 changes: 4 additions & 8 deletions example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,15 @@ struct MatrixAddKernel
//! \param A First input matrix
//! \param B Second input matrix
//! \param C Output matrix where the result of A + B will be stored
template<typename TAcc, typename MdSpan>
ALPAKA_FN_ACC void operator()(TAcc const& acc, MdSpan A, MdSpan B, MdSpan C) const
template<typename TAcc, typename TMdSpan>
ALPAKA_FN_ACC void operator()(TAcc const& acc, TMdSpan A, TMdSpan B, TMdSpan C) const
{
// compile time check
static_assert(isMdspan<MdSpan>::value, "The type MdSpan should be an std mdspan");
static_assert(isMdspan<TMdSpan>::value, "The type MdSpan should be an std mdspan");

auto const i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
auto const j = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[1];

static_assert(isMdspan<MdSpan>::value, "The type MdSpan should be an std mdspan");
if(i < A.extent(0) && j < A.extent(1))
{
C(i, j) = A(i, j) + B(i, j);
Expand Down Expand Up @@ -130,7 +129,6 @@ auto example(TAccTag const&) -> int
auto mdDevB = alpaka::experimental::getMdSpan(bufDevB);
auto mdDevC = alpaka::experimental::getMdSpan(bufDevC);


// Let alpaka calculate good block and grid sizes given our full problem extent.
auto const workDiv = alpaka::getValidWorkDiv<Acc>(
devAcc,
Expand All @@ -142,11 +140,9 @@ auto example(TAccTag const&) -> int
// Execute the kernel
alpaka::exec<Acc>(queue, workDiv, MatrixAddKernel{}, mdDevA, mdDevB, mdDevC);

// Wait for the kernel to finish
alpaka::wait(queue);

// Copy result back to host
alpaka::memcpy(queue, bufHostC, bufDevC);
// This wait is not necessary if the queue is a blocking queue
alpaka::wait(queue);

// Verify the result
Expand Down
17 changes: 9 additions & 8 deletions example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,14 @@ struct MatrixMulKernel
//! \param B Second input matrix
//! \param C Output matrix where the result of A * B will be stored
//! \param K The shared dimension between A and B
template<typename TAcc, typename MdSpan>
ALPAKA_FN_ACC void operator()(TAcc const& acc, MdSpan A, MdSpan B, MdSpan C, Idx K) const
template<typename TAcc, typename TMdSpan>
ALPAKA_FN_ACC void operator()(TAcc const& acc, TMdSpan A, TMdSpan B, TMdSpan C) const
{
// compile time check
static_assert(isMdspan<MdSpan>::value, "The type MdSpan should be an std mdspan");
// compile time checks
static_assert(isMdspan<TMdSpan>::value, "The type MdSpan should be an std mdspan");

// A is MxK and B is KxN
auto const K = static_cast<Idx>(A.extent(1));

auto const i = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
auto const j = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[1];
Expand Down Expand Up @@ -146,13 +149,11 @@ auto example(TAccTag const&) -> int
alpaka::GridBlockExtentSubDivRestrictions::Unrestricted);

// Execute the kernel
alpaka::exec<Acc>(queue, workDiv, MatrixMulKernel{}, mdDevA, mdDevB, mdDevC, K);

// Wait for the kernel to finish
alpaka::wait(queue);
alpaka::exec<Acc>(queue, workDiv, MatrixMulKernel{}, mdDevA, mdDevB, mdDevC);

// Copy result back to host
alpaka::memcpy(queue, bufHostC, bufDevC);
// This wait is not necessary if the queue is a blocking queue
alpaka::wait(queue);

// Verify the result
Expand Down

0 comments on commit f874872

Please sign in to comment.