diff --git a/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp b/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp index e80edfb0b22..d14910d90ef 100644 --- a/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp +++ b/example/matrixAddWithMdspan/src/matrixAddMdSpan.cpp @@ -37,16 +37,15 @@ struct MatrixAddKernel //! \param A First input matrix //! \param B Second input matrix //! \param C Output matrix where the result of A + B will be stored - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, MdSpan A, MdSpan B, MdSpan C) const + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, TMdSpan A, TMdSpan B, TMdSpan C) const { // compile time check - static_assert(isMdspan::value, "The type MdSpan should be an std mdspan"); + static_assert(isMdspan::value, "The type MdSpan should be an std mdspan"); auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - static_assert(isMdspan::value, "The type MdSpan should be an std mdspan"); if(i < A.extent(0) && j < A.extent(1)) { C(i, j) = A(i, j) + B(i, j); @@ -130,7 +129,6 @@ auto example(TAccTag const&) -> int auto mdDevB = alpaka::experimental::getMdSpan(bufDevB); auto mdDevC = alpaka::experimental::getMdSpan(bufDevC); - // Let alpaka calculate good block and grid sizes given our full problem extent. auto const workDiv = alpaka::getValidWorkDiv( devAcc, @@ -142,11 +140,9 @@ auto example(TAccTag const&) -> int // Execute the kernel alpaka::exec(queue, workDiv, MatrixAddKernel{}, mdDevA, mdDevB, mdDevC); - // Wait for the kernel to finish - alpaka::wait(queue); - // Copy result back to host alpaka::memcpy(queue, bufHostC, bufDevC); + // This wait is not necessary if the queue is a blocking queue alpaka::wait(queue); // Verify the result diff --git a/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp b/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp index 96b5b55333d..a19960e3842 100644 --- a/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp +++ b/example/matrixMulWithMdspan/src/matrixMulMdSpan.cpp @@ -43,11 +43,14 @@ struct MatrixMulKernel //! \param B Second input matrix //! \param C Output matrix where the result of A * B will be stored //! \param K The shared dimension between A and B - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, MdSpan A, MdSpan B, MdSpan C, Idx K) const + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, TMdSpan A, TMdSpan B, TMdSpan C) const { - // compile time check - static_assert(isMdspan::value, "The type MdSpan should be an std mdspan"); + // compile time checks + static_assert(isMdspan::value, "The type MdSpan should be an std mdspan"); + + // A is MxK and B is KxN + auto const K = static_cast(A.extent(1)); auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; @@ -146,13 +149,11 @@ auto example(TAccTag const&) -> int alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); // Execute the kernel - alpaka::exec(queue, workDiv, MatrixMulKernel{}, mdDevA, mdDevB, mdDevC, K); - - // Wait for the kernel to finish - alpaka::wait(queue); + alpaka::exec(queue, workDiv, MatrixMulKernel{}, mdDevA, mdDevB, mdDevC); // Copy result back to host alpaka::memcpy(queue, bufHostC, bufDevC); + // This wait is not necessary if the queue is a blocking queue alpaka::wait(queue); // Verify the result