Skip to content

Commit

Permalink
single remaining error
Browse files Browse the repository at this point in the history
  • Loading branch information
Mehmet Yusufoglu committed Mar 24, 2024
1 parent 57f7d51 commit e8dca57
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 75 deletions.
32 changes: 32 additions & 0 deletions include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#include "alpaka/acc/AccGpuUniformCudaHipRt.hpp"
#include "alpaka/acc/Traits.hpp"
#include "alpaka/core/ApiCudaRt.hpp"
#include "alpaka/core/ApiHipRt.hpp"
#include "alpaka/core/BoostPredef.hpp"
#include "alpaka/core/Cuda.hpp"
#include "alpaka/core/Decay.hpp"
Expand Down Expand Up @@ -390,7 +392,37 @@ namespace alpaka
}
}
};


} // namespace trait

// Buraya koy gerekiyorsa trait olarak koy.
template<
typename TApi,
typename TAcc,
typename TDev,
typename TKernelFnObj,
typename TGridElemExtent = Vec<Dim<TAcc>, Idx<TAcc>>,
typename TThreadElemExtent = Vec<Dim<TAcc>, Idx<TAcc>>>
ALPAKA_FN_HOST void getValidWorkDivForKernel(
TDev const&,
[[maybe_unused]] TKernelFnObj const& kernel,
TGridElemExtent const& = Vec<Dim<TAcc>, Idx<TAcc>>::ones(),
TThreadElemExtent const& = Vec<Dim<TAcc>, Idx<TAcc>>::ones(),
bool = true,
GridBlockExtentSubDivRestrictions = GridBlockExtentSubDivRestrictions::Unrestricted)
{
auto kernelName = alpaka::detail::gpuKernel<TKernelFnObj, TApi, TAcc, Dim<TAcc>, Idx<TAcc>, TAcc>;
// Log the function attributes.
typename TApi::FuncAttributes_t funcAttrs;
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::funcGetAttributes(&funcAttrs, kernelName));
std::cout << __func__ << " binaryVersion: " << funcAttrs.binaryVersion
<< " constSizeBytes: " << funcAttrs.constSizeBytes << " B"
<< " localSizeBytes: " << funcAttrs.localSizeBytes << " B"
<< " maxThreadsPerBlock: " << funcAttrs.maxThreadsPerBlock << " numRegs: " << funcAttrs.numRegs
<< " ptxVersion: " << funcAttrs.ptxVersion << " sharedSizeBytes: " << funcAttrs.sharedSizeBytes
<< " B" << std::endl;
}
} // namespace alpaka

# endif
Expand Down
32 changes: 1 addition & 31 deletions include/alpaka/workdiv/WorkDivHelpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "alpaka/core/Utility.hpp"
#include "alpaka/dev/Traits.hpp"
#include "alpaka/extent/Traits.hpp"
#include "alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp"
// #include "alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp"
#include "alpaka/vec/Vec.hpp"
#include "alpaka/workdiv/WorkDivMembers.hpp"

Expand Down Expand Up @@ -322,36 +322,6 @@ namespace alpaka
ALPAKA_UNREACHABLE(WorkDivMembers<Dim<TGridElemExtent>, Idx<TGridElemExtent>>{V{}, V{}, V{}});
}

template<
typename TApi,
typename TAcc,
typename TDev,
typename TKernel,
typename TGridElemExtent = Vec<Dim<TAcc>, Idx<TAcc>>,
typename TThreadElemExtent = Vec<Dim<TAcc>, Idx<TAcc>>>
ALPAKA_FN_HOST void enqueueWithValidWorkDiv(
TDev const&,
[[maybe_unused]] TKernel const& kernel,
TGridElemExtent const& = Vec<Dim<TAcc>, Idx<TAcc>>::ones(),
TThreadElemExtent const& = Vec<Dim<TAcc>, Idx<TAcc>>::ones(),
bool = true,
GridBlockExtentSubDivRestrictions = GridBlockExtentSubDivRestrictions::Unrestricted)
{
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
auto kernelName
= alpaka::detail::gpuKernel<TKernel, TApi, TAcc, Dim, Idx, remove_restrict_t<std::decay_t<TArgs>>...>;
// Log the function attributes.
typename TApi::FuncAttributes_t funcAttrs;
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::funcGetAttributes(&funcAttrs, kernelName));
std::cout << __func__ << " binaryVersion: " << funcAttrs.binaryVersion
<< " constSizeBytes: " << funcAttrs.constSizeBytes << " B"
<< " localSizeBytes: " << funcAttrs.localSizeBytes << " B"
<< " maxThreadsPerBlock: " << funcAttrs.maxThreadsPerBlock << " numRegs: " << funcAttrs.numRegs
<< " ptxVersion: " << funcAttrs.ptxVersion << " sharedSizeBytes: " << funcAttrs.sharedSizeBytes
<< " B" << std::endl;
#endif
}

//! \tparam TDim The dimensionality of the accelerator device properties.
//! \tparam TIdx The idx type of the accelerator device properties.
//! \tparam TWorkDiv The type of the work division.
Expand Down
62 changes: 18 additions & 44 deletions test/unit/workDiv/src/WorkDivForKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
#include <alpaka/acc/AccDevProps.hpp>
#include <alpaka/acc/AccGpuHipRt.hpp>
#include <alpaka/acc/AccGpuUniformCudaHipRt.hpp>
#include <alpaka/core/ApiCudaRt.hpp>
#include <alpaka/core/ApiHipRt.hpp>
// #include <alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp>
#include <alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp>
#include <alpaka/test/KernelExecutionFixture.hpp>
#include <alpaka/test/acc/TestAccs.hpp>
#include <alpaka/workdiv/WorkDivHelpers.hpp>
Expand All @@ -15,38 +16,6 @@
#include <catch2/catch_test_macros.hpp>

#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
namespace alpaka::detail
{
# if BOOST_COMP_CLANG
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wunused-template"
# endif
//! The GPU CUDA/HIP kernel entry point.
// \NOTE: 'A __global__ function or function template cannot have a trailing return type.'
// We have put the function into a shallow namespace and gave it a short name, so the mangled name in the
// profiler (e.g. ncu) is as shorter as possible.
template<typename TKernelFnObj, typename TApi, typename TAcc, typename TDim, typename TIdx, typename... TArgs>
__global__ void gpuKernel(Vec<TDim, TIdx> const threadElemExtent, TKernelFnObj const kernelFnObj, TArgs... args)
{
# if BOOST_ARCH_PTX && (BOOST_ARCH_PTX < BOOST_VERSION_NUMBER(2, 0, 0))
# error "Device capability >= 2.0 is required!"
# endif

TAcc const acc(threadElemExtent);

// with clang it is not possible to query std::result_of for a pure device lambda created on the host side
# if !(BOOST_COMP_CLANG_CUDA && BOOST_COMP_CLANG)
static_assert(
std::is_same_v<decltype(kernelFnObj(const_cast<TAcc const&>(acc), args...)), void>,
"The TKernelFnObj is required to return void!");
# endif
kernelFnObj(const_cast<TAcc const&>(acc), args...);
}
# if BOOST_COMP_CLANG
# pragma clang diagnostic pop
# endif
} // namespace alpaka::detail
#endif

namespace
{
Expand Down Expand Up @@ -109,26 +78,31 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivKernel", "[workDivKernel]", alpaka::test
std::ignore = getWorkDivKernel<Acc>();
}

TEMPLATE_LIST_TEST_CASE("enqueueWithValidWorkDiv.1D.withIdx", "[workDivKernel]", alpaka::test::TestAccs)
TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.1D.withIdx", "[workDivKernel]", alpaka::test::TestAccs)
{
[[maybe_unused]] HelloWorldKernel kernel;
#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) || defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
# if defined(ALPAKA_ACC_GPU_HIP_ENABLED)
using TApi = alpaka::ApiHipRt<alpaka::Dim<Acc>, alpaka::Idx<Acc>>;
# elif defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
using TApi = alpaka::ApiCudaRt<alpaka::Dim<Acc>, alpaka::Idx<Acc>>;
# endif
using Acc = TestType;
using Idx = alpaka::Idx<Acc>;
using Dim = alpaka::Dim<Acc>;
[[maybe_unused]] HelloWorldKernel kernel;
# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) || defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
using TApi = alpaka::ApiCudaRt;
# endif

# if defined(ALPAKA_ACC_GPU_HIP_ENABLED)
using TApi = alpaka::ApiHipRt;
# endif

using Vec = alpaka::Vec<Dim, Idx>;
if constexpr(Dim::value == 1)
{
auto const platform = alpaka::Platform<Acc>{};
auto const dev = alpaka::getDevByIdx(platform, 0);
// test that we can call getValidWorkDiv with the Idx type directly instead of a Vec
alpaka::enqueueWithValidWorkDiv<TApi, Acc, decltype(dev), HelloWorldKernel>(dev, kernel, Vec{256}, Vec{13});
// CHECK(alpaka::enqueueWithValidWorkDiv<Acc>(dev, Idx{256}, Idx{13}));
alpaka::getValidWorkDivForKernel<TApi, Acc, decltype(dev), HelloWorldKernel>(dev, kernel, Vec{256}, Vec{13});
// CHECK(alpaka::getValidWorkDivForKernel<Acc>(dev, Idx{256}, Idx{13}));
}
#endif
# endif
}
#endif

0 comments on commit e8dca57

Please sign in to comment.