From 79448a5275d8a5e9752e4f6301bb38b0f880a284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Vysko=C4=8Dil?= Date: Tue, 16 Jan 2024 18:00:35 +0100 Subject: [PATCH] Remove CudaVectorArrayWrapper, use Vec for Philox counters. --- .../counterBasedRng/src/counterBasedRng.cpp | 19 +- example/randomCells2D/src/randomCells2D.cpp | 40 +-- .../randomStrategies/src/randomStrategies.cpp | 17 +- include/alpaka/alpaka.hpp | 1 - .../alpaka/meta/CudaVectorArrayWrapper.hpp | 329 ------------------ include/alpaka/meta/IsArrayOrVector.hpp | 15 +- .../alpaka/rand/Philox/PhiloxBaseCommon.hpp | 15 +- .../rand/Philox/PhiloxBaseCudaArray.hpp | 65 ---- .../alpaka/rand/Philox/PhiloxBaseStdArray.hpp | 26 -- .../alpaka/rand/Philox/PhiloxBaseTraits.hpp | 93 ----- .../alpaka/rand/Philox/PhiloxConstants.hpp | 18 +- include/alpaka/rand/Philox/PhiloxSingle.hpp | 34 +- .../alpaka/rand/Philox/PhiloxStateless.hpp | 10 +- .../rand/Philox/PhiloxStatelessKeyedBase.hpp | 13 +- .../rand/Philox/PhiloxStatelessVector.hpp | 26 -- include/alpaka/rand/Philox/PhiloxVector.hpp | 28 +- include/alpaka/rand/RandDefault.hpp | 8 +- include/alpaka/rand/RandPhilox.hpp | 30 +- include/alpaka/rand/RandPhiloxStateless.hpp | 8 +- .../alpaka/rand/RandUniformCudaHipRand.hpp | 8 +- include/alpaka/vec/Vec.hpp | 1 + .../meta/src/CudaVectorArrayWrapperTest.cpp | 167 --------- test/unit/meta/src/IsArrayOrVectorTest.cpp | 17 +- test/unit/rand/src/PhiloxTest.cpp | 187 ++++++++++ test/unit/vec/src/VecTest.cpp | 3 +- 25 files changed, 327 insertions(+), 851 deletions(-) delete mode 100644 include/alpaka/meta/CudaVectorArrayWrapper.hpp delete mode 100644 include/alpaka/rand/Philox/PhiloxBaseCudaArray.hpp delete mode 100644 include/alpaka/rand/Philox/PhiloxBaseStdArray.hpp delete mode 100644 include/alpaka/rand/Philox/PhiloxBaseTraits.hpp delete mode 100644 include/alpaka/rand/Philox/PhiloxStatelessVector.hpp delete mode 100644 test/unit/meta/src/CudaVectorArrayWrapperTest.cpp create mode 100644 test/unit/rand/src/PhiloxTest.cpp diff --git a/example/counterBasedRng/src/counterBasedRng.cpp b/example/counterBasedRng/src/counterBasedRng.cpp index ec92f885d783..bfa9a35873df 100644 --- a/example/counterBasedRng/src/counterBasedRng.cpp +++ b/example/counterBasedRng/src/counterBasedRng.cpp @@ -17,12 +17,9 @@ class CounterBasedRngKernel public: template using Vec = alpaka::Vec, alpaka::Idx>; - template - using Gen = typename alpaka::rand::PhiloxStateless4x32x10Vector; - template - using Key = typename Gen::Key; - template - using Counter = typename Gen::Counter; + using Gen = typename alpaka::rand::PhiloxStateless4x32x10Vector; + using Key = typename Gen::Key; + using Counter = typename Gen::Counter; template using Mdspan = alpaka::experimental::MdSpan, alpaka::Dim>; @@ -36,7 +33,7 @@ class CounterBasedRngKernel static ALPAKA_FN_ACC auto elemLoop( TAcc const& acc, Mdspan dst, - Key const& key, + Key const& key, Vec const& threadElemExtent, Vec& threadFirstElemIdx) -> void { @@ -56,14 +53,14 @@ class CounterBasedRngKernel } else { - Counter c = {0, 0, 0, 0}; + Counter c = {0, 0, 0, 0}; for(unsigned int i = 0; i < Dim; ++i) c[i] = threadFirstElemIdx[i]; for(; threadFirstElemIdx[Dim - 1] < threadLastElemIdxClipped; ++threadFirstElemIdx[Dim - 1]) { c[Dim - 1] = threadFirstElemIdx[Dim - 1]; - auto const random = Gen::generate(c, key); + auto const random = Gen::generate(c, key); // to make use of the whole random vector we would need to ensure numElement[0] % 4 == 0 dst(alpaka::toArray(threadFirstElemIdx)) = TElem(random[0]); } @@ -82,7 +79,7 @@ class CounterBasedRngKernel //! \param extent The matrix dimension in elements. ALPAKA_NO_HOST_ACC_WARNING template - ALPAKA_FN_ACC auto operator()(TAcc const& acc, Mdspan dst, Key const& key) const -> void + ALPAKA_FN_ACC auto operator()(TAcc const& acc, Mdspan dst, Key const& key) const -> void { constexpr auto Dim = alpaka::Dim::value; static_assert(Dim <= 4, "The CounterBasedRngKernel expects at most 4-dimensional indices!"); @@ -166,7 +163,7 @@ auto main() -> int Data* const pBufHostDev(alpaka::getPtrNative(bufHostDev)); std::random_device rd{}; - CounterBasedRngKernel::Key key = {rd(), rd()}; + CounterBasedRngKernel::Key key = {rd(), rd()}; // Allocate buffer on the accelerator using BufAcc = alpaka::Buf; diff --git a/example/randomCells2D/src/randomCells2D.cpp b/example/randomCells2D/src/randomCells2D.cpp index 7b96f7954507..f2a6687cf4b5 100644 --- a/example/randomCells2D/src/randomCells2D.cpp +++ b/example/randomCells2D/src/randomCells2D.cpp @@ -15,16 +15,14 @@ constexpr unsigned NUM_X = 127; constexpr unsigned NUM_Y = 211; /// Selected PRNG engine for single-value operation -template -using RandomEngineSingle = alpaka::rand::Philox4x32x10; +using RandomEngineSingle = alpaka::rand::Philox4x32x10; // using RandomEngineSingle = alpaka::rand::engine::uniform_cuda_hip::Xor; // using RandomEngineSingle = alpaka::rand::engine::cpu::MersenneTwister; // using RandomEngineSingle = alpaka::rand::engine::cpu::TinyMersenneTwister; /// Selected PRNG engine for vector operation -template -using RandomEngineVector = alpaka::rand::Philox4x32x10Vector; +using RandomEngineVector = alpaka::rand::Philox4x32x10Vector; /** Get a pointer to the correct location of `TElement array` taking pitch into account. * @@ -71,7 +69,7 @@ struct RunTimestepKernelSingle ALPAKA_FN_ACC auto operator()( TAcc const& acc, TExtent const extent, - RandomEngineSingle* const states, + RandomEngineSingle* const states, float* const cells, std::size_t pitchRand, std::size_t pitchOut) const -> void @@ -84,7 +82,7 @@ struct RunTimestepKernelSingle auto cellsOut = pitchedPointer2D(cells, pitchOut, idx); // Setup generator and distribution. - RandomEngineSingle engine(*statesOut); + RandomEngineSingle engine(*statesOut); alpaka::rand::UniformReal dist; float sum = 0; @@ -104,7 +102,7 @@ struct RunTimestepKernelVector ALPAKA_FN_ACC auto operator()( TAcc const& acc, TExtent const extent, - RandomEngineVector* const states, + RandomEngineVector* const states, float* const cells, std::size_t pitchRand, std::size_t pitchOut) const -> void @@ -117,10 +115,10 @@ struct RunTimestepKernelVector auto cellsOut = pitchedPointer2D(cells, pitchOut, idx); // Setup generator and distribution. - RandomEngineVector engine(*statesOut); // Load the state of the random engine + RandomEngineVector engine(*statesOut); // Load the state of the random engine using DistributionResult = - typename RandomEngineVector::template ResultContainer; // Container type which will store - // the distribution results + typename RandomEngineVector::template ResultContainer; // Container type which will store + // the distribution results constexpr unsigned resultVectorSize = std::tuple_size_v; // Size of the result vector alpaka::rand::UniformReal dist; // Vector-aware distribution function @@ -160,16 +158,16 @@ auto main() -> int using BufHost = alpaka::Buf; using BufAcc = alpaka::Buf; - using BufHostRand = alpaka::Buf, Dim, Idx>; - using BufAccRand = alpaka::Buf, Dim, Idx>; - using BufHostRandVec = alpaka::Buf, Dim, Idx>; - using BufAccRandVec = alpaka::Buf, Dim, Idx>; + using BufHostRand = alpaka::Buf; + using BufAccRand = alpaka::Buf; + using BufHostRandVec = alpaka::Buf; + using BufAccRandVec = alpaka::Buf; using WorkDiv = alpaka::WorkDivMembers; constexpr Idx numX = NUM_X; constexpr Idx numY = NUM_Y; - const Vec extent(numY, numX); + Vec const extent(numY, numX); constexpr Idx perThreadX = 1; constexpr Idx perThreadY = 1; @@ -192,13 +190,13 @@ auto main() -> int BufAcc bufAccV{alpaka::allocBuf(devAcc, extent)}; float* const ptrBufAccV{alpaka::getPtrNative(bufAccV)}; - BufHostRand bufHostRandS{alpaka::allocBuf, Idx>(devHost, extent)}; - BufAccRand bufAccRandS{alpaka::allocBuf, Idx>(devAcc, extent)}; - RandomEngineSingle* const ptrBufAccRandS{alpaka::getPtrNative(bufAccRandS)}; + BufHostRand bufHostRandS{alpaka::allocBuf(devHost, extent)}; + BufAccRand bufAccRandS{alpaka::allocBuf(devAcc, extent)}; + RandomEngineSingle* const ptrBufAccRandS{alpaka::getPtrNative(bufAccRandS)}; - BufHostRandVec bufHostRandV{alpaka::allocBuf, Idx>(devHost, extent)}; - BufAccRandVec bufAccRandV{alpaka::allocBuf, Idx>(devAcc, extent)}; - RandomEngineVector* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)}; + BufHostRandVec bufHostRandV{alpaka::allocBuf(devHost, extent)}; + BufAccRandVec bufAccRandV{alpaka::allocBuf(devAcc, extent)}; + RandomEngineVector* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)}; InitRandomKernel initRandomKernel; auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[0]; diff --git a/example/randomStrategies/src/randomStrategies.cpp b/example/randomStrategies/src/randomStrategies.cpp index 84d2b54303bd..301906165536 100644 --- a/example/randomStrategies/src/randomStrategies.cpp +++ b/example/randomStrategies/src/randomStrategies.cpp @@ -17,8 +17,7 @@ constexpr unsigned NUM_ROLLS = 2000; ///< Amount of random number "dice rolls" p /// Selected PRNG engine // Comment the current "using" line, and uncomment a different one to change the PRNG engine -template -using RandomEngine = alpaka::rand::Philox4x32x10; +using RandomEngine = alpaka::rand::Philox4x32x10; // using RandomEngine = alpaka::rand::engine::cpu::MersenneTwister; // using RandomEngine = alpaka::rand::engine::cpu::TinyMersenneTwister; @@ -45,8 +44,8 @@ struct Box QueueAcc queue; ///< default accelerator queue // buffers holding the PRNG states - using BufHostRand = alpaka::Buf, Dim, Idx>; - using BufAccRand = alpaka::Buf, Dim, Idx>; + using BufHostRand = alpaka::Buf; + using BufAccRand = alpaka::Buf; Vec const extentRand; ///< size of the buffer of PRNG states WorkDiv workdivRand; ///< work division for PRNG buffer initialization @@ -71,8 +70,8 @@ struct Box Vec(Idx{1}), false, alpaka::GridBlockExtentSubDivRestrictions::Unrestricted)} - , bufHostRand{alpaka::allocBuf, Idx>(alpaka::getDevByIdx(hostPlatform, 0), extentRand)} - , bufAccRand{alpaka::allocBuf, Idx>(alpaka::getDevByIdx(accPlatform, 0), extentRand)} + , bufHostRand{alpaka::allocBuf(alpaka::getDevByIdx(hostPlatform, 0), extentRand)} + , bufAccRand{alpaka::allocBuf(alpaka::getDevByIdx(accPlatform, 0), extentRand)} , extentResult{static_cast((NUM_POINTS * NUM_ROLLS))} // Store all "rolls" for each "point" , workdivResult{alpaka::getValidWorkDiv( alpaka::getDevByIdx(accPlatform, 0), @@ -167,7 +166,7 @@ struct FillKernel ALPAKA_FN_ACC auto operator()( TAcc const& acc, ///< current accelerator TExtent const extent, ///< size of the results buffer - RandomEngine* const states, ///< PRNG states buffer + RandomEngine* const states, ///< PRNG states buffer float* const cells ///< results buffer ) const -> void { @@ -180,7 +179,7 @@ struct FillKernel auto const numWorkers = alpaka::math::min(acc, numGridThreads, static_cast(NUM_POINTS)); - RandomEngine engine(states[idx]); // Setup the PRNG using the saved state for this thread. + RandomEngine engine(states[idx]); // Setup the PRNG using the saved state for this thread. alpaka::rand::UniformReal dist; // Setup the random number distribution for(uint32_t i = idx; i < extent[0]; i += numWorkers) { @@ -245,7 +244,7 @@ template void runStrategy(Box& box) { // Set up the pointer to the PRNG states buffer - RandomEngine* const ptrBufAccRand{alpaka::getPtrNative(box.bufAccRand)}; + RandomEngine* const ptrBufAccRand{alpaka::getPtrNative(box.bufAccRand)}; // Initialize the PRNG and its states on the device InitRandomKernel initRandomKernel; diff --git a/include/alpaka/alpaka.hpp b/include/alpaka/alpaka.hpp index 5ea5816316be..f4253f5ef97c 100644 --- a/include/alpaka/alpaka.hpp +++ b/include/alpaka/alpaka.hpp @@ -157,7 +157,6 @@ #include "alpaka/meta/Apply.hpp" #include "alpaka/meta/CartesianProduct.hpp" #include "alpaka/meta/Concatenate.hpp" -#include "alpaka/meta/CudaVectorArrayWrapper.hpp" #include "alpaka/meta/DependentFalseType.hpp" #include "alpaka/meta/Filter.hpp" #include "alpaka/meta/Fold.hpp" diff --git a/include/alpaka/meta/CudaVectorArrayWrapper.hpp b/include/alpaka/meta/CudaVectorArrayWrapper.hpp deleted file mode 100644 index 57010ef27669..000000000000 --- a/include/alpaka/meta/CudaVectorArrayWrapper.hpp +++ /dev/null @@ -1,329 +0,0 @@ -/* Copyright 2022 Jiří Vyskočil, Jan Stephan, Bernhard Manfred Gruber - * SPDX-License-Identifier: MPL-2.0 - */ - -#pragma once - -#include "alpaka/core/Common.hpp" - -#include -#include -#include -#include - -#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) || defined(ALPAKA_ACC_GPU_CUDA_ENABLED) - -# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED -# include -# endif - -# ifdef ALPAKA_ACC_GPU_HIP_ENABLED -# include -# endif - -namespace alpaka::meta -{ - namespace detail - { - template - struct CudaVectorArrayTypeTraits; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = float1; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = float2; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = float3; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = float4; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = double1; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = double2; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = double3; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = double4; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = uint1; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = uint2; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = uint3; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = uint4; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = int1; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = int2; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = int3; - }; - - template<> - struct CudaVectorArrayTypeTraits - { - using type = int4; - }; - } // namespace detail - - /// Helper struct providing [] subscript access to CUDA vector types - template - struct CudaVectorArrayWrapper; - - template - struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type - { - using value_type = TScalar; - static constexpr unsigned size = 4; - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) - { - auto it = std::begin(init); - this->x = *it++; - this->y = *it++; - this->z = *it++; - this->w = *it++; - } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) - { - static_assert(std::tuple_size_v == size, "Can only convert between vectors of same size."); - static_assert( - std::is_same_v, - "Can only convert between vectors of same element type."); - this->x = o[0]; - this->y = o[1]; - this->z = o[2]; - this->w = o[3]; - } - - ALPAKA_FN_HOST_ACC constexpr operator std::array() const - { - std::array ret; - ret[0] = this->x; - ret[1] = this->y; - ret[2] = this->z; - ret[3] = this->w; - return ret; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type& operator[](int const k) noexcept - { - assert(k >= 0 && k < 4); - return k == 0 ? this->x : (k == 1 ? this->y : (k == 2 ? this->z : this->w)); - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type const& operator[](int const k) const noexcept - { - assert(k >= 0 && k < 4); - return k == 0 ? this->x : (k == 1 ? this->y : (k == 2 ? this->z : this->w)); - } - }; - - template - struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type - { - using value_type = TScalar; - static constexpr unsigned size = 3; - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) - { - auto it = std::begin(init); - this->x = *it++; - this->y = *it++; - this->z = *it++; - } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) - { - static_assert(std::tuple_size::value == size, "Can only convert between vectors of same size."); - static_assert( - std::is_same::value, - "Can only convert between vectors of same element type."); - this->x = o[0]; - this->y = o[1]; - this->z = o[2]; - } - - ALPAKA_FN_HOST_ACC constexpr operator std::array() const - { - std::array ret; - ret[0] = this->x; - ret[1] = this->y; - ret[2] = this->z; - return ret; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type& operator[](int const k) noexcept - { - assert(k >= 0 && k < 3); - return k == 0 ? this->x : (k == 1 ? this->y : this->z); - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type const& operator[](int const k) const noexcept - { - assert(k >= 0 && k < 3); - return k == 0 ? this->x : (k == 1 ? this->y : this->z); - } - }; - - template - struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type - { - using value_type = TScalar; - static constexpr unsigned size = 2; - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) - { - auto it = std::begin(init); - this->x = *it++; - this->y = *it++; - } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) - { - static_assert(std::tuple_size::value == size, "Can only convert between vectors of same size."); - static_assert( - std::is_same::value, - "Can only convert between vectors of same element type."); - this->x = o[0]; - this->y = o[1]; - } - - ALPAKA_FN_HOST_ACC constexpr operator std::array() const - { - std::array ret; - ret[0] = this->x; - ret[1] = this->y; - return ret; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type& operator[](int const k) noexcept - { - assert(k >= 0 && k < 2); - return k == 0 ? this->x : this->y; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type const& operator[](int const k) const noexcept - { - assert(k >= 0 && k < 2); - return k == 0 ? this->x : this->y; - } - }; - - template - struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type - { - using value_type = TScalar; - static constexpr unsigned size = 1; - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) - { - auto it = std::begin(init); - this->x = *it; - } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) - { - static_assert(std::tuple_size::value == size, "Can only convert between vectors of same size."); - static_assert( - std::is_same::value, - "Can only convert between vectors of same element type."); - this->x = o[0]; - } - - ALPAKA_FN_HOST_ACC constexpr operator std::array() const - { - std::array ret; - ret[0] = this->x; - return ret; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type& operator[]([[maybe_unused]] int const k) noexcept - { - assert(k == 0); - return this->x; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr value_type const& operator[]( - [[maybe_unused]] int const k) const noexcept - { - assert(k == 0); - return this->x; - } - }; -} // namespace alpaka::meta - -namespace std -{ - /// Specialization of std::tuple_size for \a float4_array - template - struct tuple_size> : integral_constant - { - }; -} // namespace std - -#endif diff --git a/include/alpaka/meta/IsArrayOrVector.hpp b/include/alpaka/meta/IsArrayOrVector.hpp index ae94636224a8..f755916d8351 100644 --- a/include/alpaka/meta/IsArrayOrVector.hpp +++ b/include/alpaka/meta/IsArrayOrVector.hpp @@ -4,7 +4,7 @@ #pragma once -#include "alpaka/meta/CudaVectorArrayWrapper.hpp" +#include "alpaka/vec/Vec.hpp" #include #include @@ -52,11 +52,14 @@ namespace alpaka::meta { }; -#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) - /// Specialization of \a IsArrayOrVector for CUDA vector array wrapper - template - struct IsArrayOrVector> : std::true_type + /** Specialization of \a IsArrayOrVector for alpaka::Vec + * + * @tparam T inner type held in the array + * @tparam N size of the array + */ + template + struct IsArrayOrVector> : std::true_type { }; -#endif + } // namespace alpaka::meta diff --git a/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp b/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp index 3e790fabed6c..e80d8a1e571c 100644 --- a/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp +++ b/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp @@ -14,7 +14,6 @@ namespace alpaka::rand::engine * * Relies on `PhiloxStateless` to provide the PRNG and adds state to handling the counting. * - * @tparam TBackend device-dependent backend, specifies the array types * @tparam TParams Philox algorithm parameters \sa PhiloxParams * @tparam TImpl engine type implementation (CRTP) * @@ -24,14 +23,16 @@ namespace alpaka::rand::engine * OpenMP <= 4.5 standard. In OpenMP >= 5.0 types with any kind of static * data member are mappable. */ - template - class PhiloxBaseCommon - : public TBackend - , public PhiloxStateless + template + class PhiloxBaseCommon : public PhiloxStateless { public: - using Counter = typename PhiloxStateless::Counter; - using Key = typename PhiloxStateless::Key; + using Counter = typename PhiloxStateless::Counter; + using Key = typename PhiloxStateless::Key; + + /// Distribution container type + template + using ResultContainer = typename alpaka::Vec, TDistributionResultScalar>; protected: /** Advance the \a counter to the next state diff --git a/include/alpaka/rand/Philox/PhiloxBaseCudaArray.hpp b/include/alpaka/rand/Philox/PhiloxBaseCudaArray.hpp deleted file mode 100644 index c2f8c9c8deec..000000000000 --- a/include/alpaka/rand/Philox/PhiloxBaseCudaArray.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2022 Jiri Vyskocil - * SPDX-License-Identifier: MPL-2.0 - */ - -#pragma once - -#include "alpaka/meta/CudaVectorArrayWrapper.hpp" - -#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) || defined(ALPAKA_ACC_GPU_CUDA_ENABLED) - -namespace alpaka::rand::engine -{ - namespace trait - { - template - struct PhiloxResultContainerTraits; - - template<> - struct PhiloxResultContainerTraits - { - using type = meta::CudaVectorArrayWrapper; - }; - - template<> - struct PhiloxResultContainerTraits - { - using type = meta::CudaVectorArrayWrapper; - }; - - template<> - struct PhiloxResultContainerTraits - { - using type = meta::CudaVectorArrayWrapper; - }; - - template<> - struct PhiloxResultContainerTraits - { - using type = meta::CudaVectorArrayWrapper; - }; - - template - using PhiloxResultContainer = typename PhiloxResultContainerTraits::type; - } // namespace trait - - /** Philox backend using array-like interface to CUDA uintN types for the storage of Key and Counter - * - * @tparam TParams Philox algorithm parameters \sa PhiloxParams - */ - template - class PhiloxBaseCudaArray - { - static_assert(TParams::counterSize == 4, "GPU Philox implemented only for counters of width == 4"); - - public: - using Counter - = meta::CudaVectorArrayWrapper; ///< Counter type = array-like interface to CUDA uint4 - using Key = meta::CudaVectorArrayWrapper; ///< Key type = array-like interface to CUDA uint2 - template - using ResultContainer = trait::PhiloxResultContainer; ///< Vector template for - ///< distribution results - }; -} // namespace alpaka::rand::engine - -#endif diff --git a/include/alpaka/rand/Philox/PhiloxBaseStdArray.hpp b/include/alpaka/rand/Philox/PhiloxBaseStdArray.hpp deleted file mode 100644 index 53d3df565487..000000000000 --- a/include/alpaka/rand/Philox/PhiloxBaseStdArray.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright 2022 Jiri Vyskocil, Bernhard Manfred Gruber - * SPDX-License-Identifier: MPL-2.0 - */ - -#pragma once - -#include -#include - -namespace alpaka::rand::engine -{ - /** Philox backend using std::array for Key and Counter storage - * - * @tparam TParams Philox algorithm parameters \sa PhiloxParams - */ - template - class PhiloxBaseStdArray - { - public: - using Counter = std::array; ///< Counter type = std::array - using Key = std::array; ///< Key type = std::array - template - using ResultContainer - = std::array; ///< Vector template for distribution results - }; -} // namespace alpaka::rand::engine diff --git a/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp b/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp deleted file mode 100644 index 8c782fb93f9a..000000000000 --- a/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright 2022 Jiří Vyskočil, Bernhard Manfred Gruber, Jeffrey Kelling, Jan Stephan - * SPDX-License-Identifier: MPL-2.0 - */ - -#pragma once - -#include "alpaka/rand/Philox/PhiloxBaseCommon.hpp" -#include "alpaka/rand/Philox/PhiloxBaseCudaArray.hpp" -#include "alpaka/rand/Philox/PhiloxBaseStdArray.hpp" -#include "alpaka/rand/Philox/PhiloxStateless.hpp" -#include "alpaka/rand/Philox/PhiloxStatelessKeyedBase.hpp" - -#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) -namespace alpaka -{ - template - class AccGpuUniformCudaHipRt; -} // namespace alpaka -#endif - -namespace alpaka::rand::engine::trait -{ - template - inline constexpr bool isGPU = false; - -#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) - template - inline constexpr bool isGPU> = true; -#endif - - /** Selection of default backend - * - * Selects the data backend based on the accelerator device type. As of now, different backends operate - * on different array types. - * - * @tparam TAcc the accelerator as defined in alpaka/acc - * @tparam TParams Philox algorithm parameters - * @tparam TSfinae internal parameter to stop substitution search and provide the default - */ - template - struct PhiloxStatelessBaseTraits - { - // template -#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) - using Backend = std::conditional_t, PhiloxBaseCudaArray, PhiloxBaseStdArray>; -#else - using Backend = PhiloxBaseStdArray; -#endif - using Counter = typename Backend::Counter; ///< Counter array type - using Key = typename Backend::Key; ///< Key array type - template - using ResultContainer = - typename Backend::template ResultContainer; ///< Distribution - ///< container type - /// Base type to be inherited from by stateless keyed engine - using Base = PhiloxStateless; - }; - - /** Selection of default backend - * - * Selects the data backend based on the accelerator device type. As of now, different backends operate - * on different array types. - * - * @tparam TAcc the accelerator as defined in alpaka/acc - * @tparam TParams Philox algorithm parameters - * @tparam TSfinae internal parameter to stop substitution search and provide the default - */ - template - struct PhiloxStatelessKeyedBaseTraits : public PhiloxStatelessBaseTraits - { - using Backend = typename PhiloxStatelessBaseTraits::Backend; - /// Base type to be inherited from by counting engines - using Base = PhiloxStatelessKeyedBase; - }; - - /** Selection of default backend - * - * Selects the data backend based on the accelerator device type. As of now, different backends operate - * on different array types. - * - * @tparam TAcc the accelerator as defined in alpaka/acc - * @tparam TParams Philox algorithm parameters - * @tparam TImpl engine type implementation (CRTP) - * @tparam TSfinae internal parameter to stop substitution search and provide the default - */ - template - struct PhiloxBaseTraits : public PhiloxStatelessBaseTraits - { - using Backend = typename PhiloxStatelessBaseTraits::Backend; - /// Base type to be inherited from by counting engines - using Base = PhiloxBaseCommon; - }; -} // namespace alpaka::rand::engine::trait diff --git a/include/alpaka/rand/Philox/PhiloxConstants.hpp b/include/alpaka/rand/Philox/PhiloxConstants.hpp index f6000d6ceff8..831a1de3a6a3 100644 --- a/include/alpaka/rand/Philox/PhiloxConstants.hpp +++ b/include/alpaka/rand/Philox/PhiloxConstants.hpp @@ -31,34 +31,40 @@ namespace alpaka::rand::engine class PhiloxConstants { public: + /// First Weyl sequence parameter: the golden ratio static constexpr std::uint64_t WEYL_64_0() { - return 0x9E37'79B9'7F4A'7C15; ///< First Weyl sequence parameter: the golden ratio + return 0x9E37'79B9'7F4A'7C15; } + /// Second Weyl sequence parameter: \f$ \sqrt{3}-1 \f$ static constexpr std::uint64_t WEYL_64_1() { - return 0xBB67'AE85'84CA'A73B; ///< Second Weyl sequence parameter: \f$ \sqrt{3}-1 \f$ + return 0xBB67'AE85'84CA'A73B; } + /// 1st Weyl sequence parameter, 32 bits static constexpr std::uint32_t WEYL_32_0() { - return high32Bits(WEYL_64_0()); ///< 1st Weyl sequence parameter, 32 bits + return high32Bits(WEYL_64_0()); } + /// 2nd Weyl sequence parameter, 32 bits static constexpr std::uint32_t WEYL_32_1() { - return high32Bits(WEYL_64_1()); ///< 2nd Weyl sequence parameter, 32 bits + return high32Bits(WEYL_64_1()); } + /// First Philox S-box multiplier static constexpr std::uint32_t MULTIPLITER_4x32_0() { - return 0xCD9E'8D57; ///< First Philox S-box multiplier + return 0xCD9E'8D57; } + /// Second Philox S-box multiplier static constexpr std::uint32_t MULTIPLITER_4x32_1() { - return 0xD251'1F53; ///< Second Philox S-box multiplier + return 0xD251'1F53; } }; } // namespace alpaka::rand::engine diff --git a/include/alpaka/rand/Philox/PhiloxSingle.hpp b/include/alpaka/rand/Philox/PhiloxSingle.hpp index 4cccae7e11b3..3f7b6ffc08d4 100644 --- a/include/alpaka/rand/Philox/PhiloxSingle.hpp +++ b/include/alpaka/rand/Philox/PhiloxSingle.hpp @@ -5,7 +5,7 @@ #pragma once #include "alpaka/rand/Philox/MultiplyAndSplit64to32.hpp" -#include "alpaka/rand/Philox/PhiloxBaseTraits.hpp" +#include "alpaka/rand/Philox/PhiloxBaseCommon.hpp" #include @@ -22,10 +22,14 @@ namespace alpaka::rand::engine using Counter = TCounter; using Key = TKey; - Counter counter; ///< Counter array - Key key; ///< Key array - Counter result; ///< Intermediate result array - std::uint32_t position; ///< Pointer to the active intermediate result element + /// Counter array + Counter counter; + /// Key array + Key key; + /// Intermediate result array + Counter result; + /// Pointer to the active intermediate result element + std::uint32_t position; // TODO: Box-Muller states }; @@ -36,21 +40,23 @@ namespace alpaka::rand::engine * operator(). Additionally a pointer has to be stored indicating which part of the result array is to be * returned next. * - * @tparam TAcc Accelerator type as defined in alpaka/acc * @tparam TParams Basic parameters for the Philox algorithm */ - template - class PhiloxSingle : public trait::PhiloxBaseTraits>::Base + template + class PhiloxSingle : public PhiloxBaseCommon> { public: - /// Specialization for different TAcc backends - using Traits = typename trait::PhiloxBaseTraits>; + using Base = PhiloxBaseCommon>; - using Counter = typename Traits::Counter; ///< Backend-dependent Counter type - using Key = typename Traits::Key; ///< Backend-dependent Key type - using State = PhiloxStateSingle; ///< Backend-dependent State type + /// Counter type + using Counter = typename Base::Counter; + /// Key type + using Key = typename Base::Key; + /// State type + using State = PhiloxStateSingle; - State state; ///< Internal engine state + /// Internal engine state + State state; protected: /** Advance internal counter to the next value diff --git a/include/alpaka/rand/Philox/PhiloxStateless.hpp b/include/alpaka/rand/Philox/PhiloxStateless.hpp index 094b9d3de117..3011d446c3d3 100644 --- a/include/alpaka/rand/Philox/PhiloxStateless.hpp +++ b/include/alpaka/rand/Philox/PhiloxStateless.hpp @@ -7,6 +7,7 @@ #include "alpaka/core/Unroll.hpp" #include "alpaka/rand/Philox/MultiplyAndSplit64to32.hpp" #include "alpaka/rand/Philox/PhiloxConstants.hpp" +#include "alpaka/vec/Vec.hpp" #include @@ -28,13 +29,12 @@ namespace alpaka::rand::engine /** Class basic Philox family counter-based PRNG * - * Checks the validity of passed-in parameters and calls the \a TBackend methods to perform N rounds of the + * Checks the validity of passed-in parameters and calls the backend methods to perform N rounds of the * Philox shuffle. * - * @tparam TBackend device-dependent backend, specifies the array types * @tparam TParams Philox algorithm parameters \sa PhiloxParams */ - template + template class PhiloxStateless : public PhiloxConstants { static constexpr unsigned numRounds() @@ -60,8 +60,8 @@ namespace alpaka::rand::engine static_assert(numberWidth() == 32, "Philox implemented only for 32 bit numbers."); public: - using Counter = typename TBackend::Counter; - using Key = typename TBackend::Key; + using Counter = alpaka::Vec, std::uint32_t>; + using Key = alpaka::Vec, std::uint32_t>; using Constants = PhiloxConstants; protected: diff --git a/include/alpaka/rand/Philox/PhiloxStatelessKeyedBase.hpp b/include/alpaka/rand/Philox/PhiloxStatelessKeyedBase.hpp index c997ec0e95b8..bb6795b7c8a0 100644 --- a/include/alpaka/rand/Philox/PhiloxStatelessKeyedBase.hpp +++ b/include/alpaka/rand/Philox/PhiloxStatelessKeyedBase.hpp @@ -10,20 +10,19 @@ namespace alpaka::rand::engine { /** Common class for Philox family engines * - * Checks the validity of passed-in parameters and calls the \a TBackend methods to perform N rounds of the + * Checks the validity of passed-in parameters and calls the backend methods to perform N rounds of the * Philox shuffle. * - * @tparam TBackend device-dependent backend, specifies the array types * @tparam TParams Philox algorithm parameters \sa PhiloxParams */ - template - struct PhiloxStatelessKeyedBase : public PhiloxStateless + template + struct PhiloxStatelessKeyedBase : public PhiloxStateless { public: - using Counter = typename PhiloxStateless::Counter; - using Key = typename PhiloxStateless::Key; + using Counter = typename PhiloxStateless::Counter; + using Key = typename PhiloxStateless::Key; - const Key m_key; + Key const m_key; PhiloxStatelessKeyedBase(Key&& key) : m_key(std::move(key)) { diff --git a/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp b/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp deleted file mode 100644 index 49a7fa1b1923..000000000000 --- a/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright 2022 Jeffrey Kelling - * SPDX-License-Identifier: MPL-2.0 - */ - -#pragma once - -#include "alpaka/rand/Philox/PhiloxBaseTraits.hpp" - -#include - -namespace alpaka::rand::engine -{ - /** Philox-stateless engine generating a vector of numbers - * - * This engine's operator() will return a vector of numbers corresponding to the full size of its counter. - * This is a convenience vs. memory size tradeoff since the user has to deal with the output array - * themselves, but the internal state comprises only of a single counter and a key. - * - * @tparam TAcc Accelerator type as defined in alpaka/acc - * @tparam TParams Basic parameters for the Philox algorithm - */ - template - class PhiloxStatelessVector : public trait::PhiloxStatelessBaseTraits::Base - { - }; -} // namespace alpaka::rand::engine diff --git a/include/alpaka/rand/Philox/PhiloxVector.hpp b/include/alpaka/rand/Philox/PhiloxVector.hpp index 648399caa022..64c89b44d899 100644 --- a/include/alpaka/rand/Philox/PhiloxVector.hpp +++ b/include/alpaka/rand/Philox/PhiloxVector.hpp @@ -5,7 +5,7 @@ #pragma once #include "alpaka/rand/Philox/MultiplyAndSplit64to32.hpp" -#include "alpaka/rand/Philox/PhiloxBaseTraits.hpp" +#include "alpaka/rand/Philox/PhiloxBaseCommon.hpp" #include @@ -22,8 +22,10 @@ namespace alpaka::rand::engine using Counter = TCounter; using Key = TKey; - Counter counter; ///< Counter array - Key key; ///< Key array + /// Counter array + Counter counter; + /// Key array + Key key; }; /** Philox engine generating a vector of numbers @@ -32,21 +34,23 @@ namespace alpaka::rand::engine * This is a convenience vs. memory size tradeoff since the user has to deal with the output array * themselves, but the internal state comprises only of a single counter and a key. * - * @tparam TAcc Accelerator type as defined in alpaka/acc * @tparam TParams Basic parameters for the Philox algorithm */ - template - class PhiloxVector : public trait::PhiloxBaseTraits>::Base + template + class PhiloxVector : public PhiloxBaseCommon> { public: - /// Specialization for different TAcc backends - using Traits = trait::PhiloxBaseTraits>; + using Base = PhiloxBaseCommon>; + + /// Counter type + using Counter = typename Base::Counter; + /// Key type + using Key = typename Base::Key; + /// State type + using State = PhiloxStateVector; - using Counter = typename Traits::Counter; ///< Backend-dependent Counter type - using Key = typename Traits::Key; ///< Backend-dependent Key type - using State = PhiloxStateVector; ///< Backend-dependent State type template - using ResultContainer = typename Traits::template ResultContainer; + using ResultContainer = typename Base::template ResultContainer; State state; diff --git a/include/alpaka/rand/RandDefault.hpp b/include/alpaka/rand/RandDefault.hpp index 6cb1701810b2..bbe763c67f67 100644 --- a/include/alpaka/rand/RandDefault.hpp +++ b/include/alpaka/rand/RandDefault.hpp @@ -79,7 +79,7 @@ namespace alpaka::rand ALPAKA_FN_HOST_ACC auto operator()(TEngine& engine) -> T { constexpr BitsT limit = static_cast(1) << std::numeric_limits::digits; - const BitsT b = UniformUint()(engine); + BitsT const b = UniformUint()(engine); auto const ret = static_cast(b & (limit - 1)) / limit; return ret; } @@ -147,7 +147,7 @@ namespace alpaka::rand } while(u1 <= std::numeric_limits::epsilon()); // compute z0 and z1 - const T mag = sigma * math::sqrt(*m_acc, static_cast(-2.) * math::log(*m_acc, u1)); + T const mag = sigma * math::sqrt(*m_acc, static_cast(-2.) * math::log(*m_acc, u1)); constexpr T twoPi = static_cast(2. * math::constants::pi); // getting two normal number out of this, store one for later m_cache = mag * static_cast(math::cos(*m_acc, twoPi * u2)) + mu; @@ -155,7 +155,7 @@ namespace alpaka::rand return mag * static_cast(math::sin(*m_acc, twoPi * u2)) + mu; } - const T ret = m_cache; + T const ret = m_cache; m_cache = std::numeric_limits::quiet_NaN(); return ret; } @@ -207,7 +207,7 @@ namespace alpaka::rand TAcc const& /* acc */, std::uint32_t const& seed, std::uint32_t const& subsequence, - std::uint32_t const& offset) -> Philox4x32x10 + std::uint32_t const& offset) -> Philox4x32x10 { return {seed, subsequence, offset}; } diff --git a/include/alpaka/rand/RandPhilox.hpp b/include/alpaka/rand/RandPhilox.hpp index 72cf99b297b0..d11cacb6e97d 100644 --- a/include/alpaka/rand/RandPhilox.hpp +++ b/include/alpaka/rand/RandPhilox.hpp @@ -27,15 +27,14 @@ namespace alpaka::rand * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3," * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405. - * - * @tparam TAcc Accelerator type as defined in alpaka/acc */ - template - class Philox4x32x10 : public concepts::Implements> + class Philox4x32x10 : public concepts::Implements { public: - using EngineParams = engine::PhiloxParams<4, 32, 10>; ///< Philox algorithm: 10 rounds, 4 numbers of size 32. - using EngineVariant = engine::PhiloxSingle; ///< Engine outputs a single number + /// Philox algorithm: 10 rounds, 4 numbers of size 32. + using EngineParams = engine::PhiloxParams<4, 32, 10>; + /// Engine outputs a single number + using EngineVariant = engine::PhiloxSingle; /** Initialize a new Philox engine * @@ -84,15 +83,12 @@ namespace alpaka::rand * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3," * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405. - * - * @tparam TAcc Accelerator type as defined in alpaka/acc */ - template - class Philox4x32x10Vector : public concepts::Implements> + class Philox4x32x10Vector : public concepts::Implements { public: using EngineParams = engine::PhiloxParams<4, 32, 10>; - using EngineVariant = engine::PhiloxVector; + using EngineVariant = engine::PhiloxVector; /** Initialize a new Philox engine * @@ -178,7 +174,7 @@ namespace alpaka::rand if constexpr(meta::IsArrayOrVector::value) { auto result = engine(); - T scale = static_cast(1) / engine.max() * _range; + T scale = static_cast(1) / static_cast(engine.max()) * _range; TResult ret{ static_cast(result[0]) * scale + _min, static_cast(result[1]) * scale + _min, @@ -189,15 +185,17 @@ namespace alpaka::rand else { // Since it's possible to get a host-only engine here, the call has to go through proxy - return static_cast(EngineCallHostAccProxy{}(engine)) / engine.max() * _range + _min; + return static_cast(EngineCallHostAccProxy{}(engine)) / static_cast(engine.max()) + * _range + + _min; } ALPAKA_UNREACHABLE(TResult{}); } private: - const T _min; - const T _max; - const T _range; + T const _min; + T const _max; + T const _range; }; } // namespace alpaka::rand diff --git a/include/alpaka/rand/RandPhiloxStateless.hpp b/include/alpaka/rand/RandPhiloxStateless.hpp index b7480f6f79b7..b2530d130ee1 100644 --- a/include/alpaka/rand/RandPhiloxStateless.hpp +++ b/include/alpaka/rand/RandPhiloxStateless.hpp @@ -5,7 +5,6 @@ #pragma once #include "alpaka/rand/Philox/PhiloxStateless.hpp" -#include "alpaka/rand/Philox/PhiloxStatelessVector.hpp" #include "alpaka/rand/Traits.hpp" namespace alpaka::rand @@ -20,13 +19,10 @@ namespace alpaka::rand * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3," * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405. - * - * @tparam TAcc Accelerator type as defined in alpaka/acc */ - template class PhiloxStateless4x32x10Vector - : public alpaka::rand::engine::PhiloxStatelessVector> - , public concepts::Implements> + : public alpaka::rand::engine::PhiloxStateless> + , public concepts::Implements { public: using EngineParams = engine::PhiloxParams<4, 32, 10>; diff --git a/include/alpaka/rand/RandUniformCudaHipRand.hpp b/include/alpaka/rand/RandUniformCudaHipRand.hpp index 97090be3ed6d..63ffea909e25 100644 --- a/include/alpaka/rand/RandUniformCudaHipRand.hpp +++ b/include/alpaka/rand/RandUniformCudaHipRand.hpp @@ -231,7 +231,7 @@ namespace alpaka::rand template struct CreateNormalReal, T, std::enable_if_t>> { - __device__ static auto createNormalReal(RandUniformCudaHipRand const& /*rand*/) + static __device__ auto createNormalReal(RandUniformCudaHipRand const& /*rand*/) -> uniform_cuda_hip::NormalReal { return {}; @@ -242,7 +242,7 @@ namespace alpaka::rand template struct CreateUniformReal, T, std::enable_if_t>> { - __device__ static auto createUniformReal(RandUniformCudaHipRand const& /*rand*/) + static __device__ auto createUniformReal(RandUniformCudaHipRand const& /*rand*/) -> uniform_cuda_hip::UniformReal { return {}; @@ -253,7 +253,7 @@ namespace alpaka::rand template struct CreateUniformUint, T, std::enable_if_t>> { - __device__ static auto createUniformUint(RandUniformCudaHipRand const& /*rand*/) + static __device__ auto createUniformUint(RandUniformCudaHipRand const& /*rand*/) -> uniform_cuda_hip::UniformUint { return {}; @@ -267,7 +267,7 @@ namespace alpaka::rand template struct CreateDefault> { - __device__ static auto createDefault( + static __device__ auto createDefault( RandUniformCudaHipRand const& /*rand*/, std::uint32_t const& seed = 0, std::uint32_t const& subsequence = 0, diff --git a/include/alpaka/vec/Vec.hpp b/include/alpaka/vec/Vec.hpp index 54b5eeb547d9..45b5f02a46b9 100644 --- a/include/alpaka/vec/Vec.hpp +++ b/include/alpaka/vec/Vec.hpp @@ -41,6 +41,7 @@ namespace alpaka using Dim = TDim; using Val = TVal; + using value_type = Val; //!< STL-like value_type. private: //! A sequence of integers from 0 to dim-1. diff --git a/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp b/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp deleted file mode 100644 index d325fc838045..000000000000 --- a/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* Copyright 2022 Jiří Vyskočil, Jan Stephan - * SPDX-License-Identifier: MPL-2.0 - */ - - -#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) - -# include -# include -# include -# include -# include -# include - -# include -# include - -# include - -/* The tests here use equals for comparing float values for exact equality. This is not - * an issue of arithmetics. We are testing whether the values saved in a container are the same as the ones retrieved - * from it afterwards. In this case, returning a value that would not be exactly but only approximately equal to the - * one that was stored in the container would be a grave error. - */ -template -ALPAKA_FN_INLINE ALPAKA_FN_HOST_ACC bool equals(T1 a, T2 b) -{ - return a == static_cast(b); -} - -template<> -ALPAKA_FN_INLINE ALPAKA_FN_HOST_ACC bool equals(float a, float b) -{ - return alpaka::math::floatEqualExactNoWarning(a, b); -} - -template<> -ALPAKA_FN_INLINE ALPAKA_FN_HOST_ACC bool equals(double a, double b) -{ - return alpaka::math::floatEqualExactNoWarning(a, b); -} - -template -class CudaVectorArrayWrapperTestKernel -{ -public: - ALPAKA_NO_HOST_ACC_WARNING - template - ALPAKA_FN_ACC auto operator()(TAcc const& /* acc */, bool* success) const -> void - { - using T1 = alpaka::meta::CudaVectorArrayWrapper; - T1 t1{0}; - static_assert(T1::size == 1, "CudaVectorArrayWrapper in-kernel size test failed!"); - static_assert(std::tuple_size_v == 1, "CudaVectorArrayWrapper in-kernel tuple_size test failed!"); - static_assert(std::is_same_v, "CudaVectorArrayWrapper in-kernel type test failed!"); - ALPAKA_CHECK(*success, equals(t1[0], T{0})); - - using T2 = alpaka::meta::CudaVectorArrayWrapper; - T2 t2{0, 1}; - static_assert(T2::size == 2, "CudaVectorArrayWrapper in-kernel size test failed!"); - static_assert(std::tuple_size_v == 2, "CudaVectorArrayWrapper in-kernel tuple_size test failed!"); - static_assert(std::is_same_v, "CudaVectorArrayWrapper in-kernel type test failed!"); - ALPAKA_CHECK(*success, equals(t2[0], T{0})); - ALPAKA_CHECK(*success, equals(t2[1], T{1})); - - using T3 = alpaka::meta::CudaVectorArrayWrapper; - T3 t3{0, 0, 0}; - t3 = {0, 1, 2}; - static_assert(T3::size == 3, "CudaVectorArrayWrapper in-kernel size test failed!"); - static_assert(std::tuple_size_v == 3, "CudaVectorArrayWrapper in-kernel tuple_size test failed!"); - static_assert(std::is_same_v, "CudaVectorArrayWrapper in-kernel type test failed!"); - ALPAKA_CHECK(*success, equals(t3[0], T{0})); - ALPAKA_CHECK(*success, equals(t3[1], T{1})); - ALPAKA_CHECK(*success, equals(t3[2], T{2})); - - using T4 = alpaka::meta::CudaVectorArrayWrapper; - T4 t4{0, 0, 0, 0}; - t4[1] = 1; - t4[2] = t4[1] + 1; - t4[3] = t4[2] + t2[1]; - static_assert(T4::size == 4, "CudaVectorArrayWrapper in-kernel size test failed!"); - static_assert(std::tuple_size_v == 4, "CudaVectorArrayWrapper in-kernel tuple_size test failed!"); - static_assert(std::is_same_v, "CudaVectorArrayWrapper in-kernel type test failed!"); - ALPAKA_CHECK(*success, equals(t4[0], T{0})); - ALPAKA_CHECK(*success, equals(t4[1], T{1})); - ALPAKA_CHECK(*success, equals(t4[2], T{2})); - ALPAKA_CHECK(*success, equals(t4[3], T{3})); - } -}; - -TEMPLATE_LIST_TEST_CASE("cudaVectorArrayWrapperDevice", "[meta]", alpaka::test::TestAccs) -{ - using Acc = TestType; - using Dim = alpaka::Dim; - using Idx = alpaka::Idx; - - alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::ones()); - - CudaVectorArrayWrapperTestKernel kernelInt; - REQUIRE(fixture(kernelInt)); - - CudaVectorArrayWrapperTestKernel kernelUnsigned; - REQUIRE(fixture(kernelUnsigned)); - - CudaVectorArrayWrapperTestKernel kernelFloat; - REQUIRE(fixture(kernelFloat)); - - CudaVectorArrayWrapperTestKernel kernelDouble; - REQUIRE(fixture(kernelDouble)); -} - -TEST_CASE("cudaVectorArrayWrapperHost", "[meta]") -{ - // TODO: It would be nice to check all possible type vs. size combinations. - - using Float1 = alpaka::meta::CudaVectorArrayWrapper; - Float1 floatWrapper1{-1.0f}; - STATIC_REQUIRE(Float1::size == 1); - STATIC_REQUIRE(std::tuple_size_v == 1); - STATIC_REQUIRE(std::is_same_v); - STATIC_REQUIRE(alpaka::meta::IsStrictBase::value); - REQUIRE(equals(floatWrapper1[0], -1.0f)); - - using Int1 = alpaka::meta::CudaVectorArrayWrapper; - Int1 intWrapper1 = {-42}; - STATIC_REQUIRE(Int1::size == 1); - STATIC_REQUIRE(std::tuple_size_v == 1); - STATIC_REQUIRE(std::is_same_v); - STATIC_REQUIRE(alpaka::meta::IsStrictBase::value); - REQUIRE(intWrapper1[0] == -42); - - using Uint2 = alpaka::meta::CudaVectorArrayWrapper; - Uint2 uintWrapper2{0u, 1u}; - STATIC_REQUIRE(Uint2::size == 2); - STATIC_REQUIRE(std::tuple_size_v == 2); - STATIC_REQUIRE(std::is_same_v); - STATIC_REQUIRE(alpaka::meta::IsStrictBase::value); - REQUIRE(uintWrapper2[0] == 0u); - REQUIRE(uintWrapper2[1] == 1u); - - using Uint4 = alpaka::meta::CudaVectorArrayWrapper; - Uint4 uintWrapper4{0u, 0u, 0u, 0u}; - STATIC_REQUIRE(Uint4::size == 4); - STATIC_REQUIRE(std::tuple_size_v == 4); - STATIC_REQUIRE(std::is_same_v); - STATIC_REQUIRE(alpaka::meta::IsStrictBase::value); - uintWrapper4[1] = 1u; - uintWrapper4[2] = uintWrapper4[1] + 1u; - uintWrapper4[3] = uintWrapper4[2] + uintWrapper2[1]; - REQUIRE(uintWrapper4[0] == 0u); - REQUIRE(uintWrapper4[1] == 1u); - REQUIRE(uintWrapper4[2] == 2u); - REQUIRE(uintWrapper4[3] == 3u); - - using Double3 = alpaka::meta::CudaVectorArrayWrapper; - Double3 doubleWrapper3{0.0, 0.0, 0.0}; - doubleWrapper3 = {0.0, -1.0, -2.0}; - STATIC_REQUIRE(Double3::size == 3); - STATIC_REQUIRE(std::tuple_size_v == 3); - STATIC_REQUIRE(std::is_same_v); - STATIC_REQUIRE(alpaka::meta::IsStrictBase::value); - REQUIRE(equals(doubleWrapper3[0], 0.0)); - REQUIRE(equals(doubleWrapper3[1], -1.0)); - REQUIRE(equals(doubleWrapper3[2], -2.0)); -} - -#endif diff --git a/test/unit/meta/src/IsArrayOrVectorTest.cpp b/test/unit/meta/src/IsArrayOrVectorTest.cpp index 9558afda4c9d..dc511d66ec90 100644 --- a/test/unit/meta/src/IsArrayOrVectorTest.cpp +++ b/test/unit/meta/src/IsArrayOrVectorTest.cpp @@ -3,6 +3,7 @@ */ #include +#include #include @@ -14,6 +15,7 @@ TEST_CASE("isArrayOrVector", "[meta]") { STATIC_REQUIRE(alpaka::meta::IsArrayOrVector>::value); STATIC_REQUIRE(alpaka::meta::IsArrayOrVector>::value); + STATIC_REQUIRE(alpaka::meta::IsArrayOrVector, float>>::value); [[maybe_unused]] float arrayFloat[4] = {1.0f, 2.0f, 3.0f, 4.0f}; STATIC_REQUIRE(alpaka::meta::IsArrayOrVector::value); @@ -30,18 +32,3 @@ TEST_CASE("isActuallyNotArrayOrVector", "[meta]") std::string notAnArrayString{"alpaka"}; STATIC_REQUIRE_FALSE(alpaka::meta::IsArrayOrVector::value); } - -#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) -TEST_CASE("isArrayOrVectorCudaWrappers", "[meta]") -{ - STATIC_REQUIRE(alpaka::meta::IsArrayOrVector>::value); - STATIC_REQUIRE(alpaka::meta::IsArrayOrVector>::value); - STATIC_REQUIRE(alpaka::meta::IsArrayOrVector>::value); - STATIC_REQUIRE(alpaka::meta::IsArrayOrVector>::value); -} - -TEST_CASE("isNotArrayOrVectorCudaVector", "[meta]") -{ - STATIC_REQUIRE_FALSE(alpaka::meta::IsArrayOrVector::value); -} -#endif diff --git a/test/unit/rand/src/PhiloxTest.cpp b/test/unit/rand/src/PhiloxTest.cpp new file mode 100644 index 000000000000..833ab4732e0e --- /dev/null +++ b/test/unit/rand/src/PhiloxTest.cpp @@ -0,0 +1,187 @@ +/* Copyright 2024 Jiri Vyskocil + * SPDX-License-Identifier: MPL-2.0 + */ + +#include +#include +#include +#include + +#include +#include + +class PhiloxTest +{ +protected: + alpaka::rand::Philox4x32x10 statefulSingleEngine; + alpaka::rand::Philox4x32x10Vector statefulVectorEngine; +}; + +TEST_CASE_METHOD(PhiloxTest, "HostStatefulVectorEngineTest") +{ + auto const resultVec = statefulVectorEngine(); + for(auto& result : resultVec) + { + REQUIRE(result >= statefulVectorEngine.min()); + REQUIRE(result <= statefulVectorEngine.max()); + } +} + +TEST_CASE_METHOD(PhiloxTest, "HostStatefulSingleEngineTest") +{ + auto const result = statefulSingleEngine(); + REQUIRE(result >= statefulSingleEngine.min()); + REQUIRE(result <= statefulSingleEngine.max()); +} + +TEST_CASE("HostStatelessEngineTest") +{ + using Gen = alpaka::rand::PhiloxStateless4x32x10Vector; + using Key = typename Gen::Key; + using Counter = typename Gen::Counter; + Key key = {42, 12345}; + Counter counter1 = {6789, 321, 0, 0}; + auto const result1 = Gen::generate(counter1, key); + Counter counter2 = {6789, 321, 0, 1}; + auto const result2 = Gen::generate(counter2, key); + // Make sure that the inputs are really expected to lead to different results. + REQUIRE(result1 != result2); +} + +template +class PhiloxTestKernelSingle +{ + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC void genNumbers(TAcc const& acc, bool* success, T_Generator& gen) const + { + { + static_cast(acc); + alpaka::rand::UniformReal dist; + auto const result = dist(gen); + ALPAKA_CHECK(*success, static_cast(0.0) <= result); + ALPAKA_CHECK(*success, static_cast(1.0) > result); + } + } + +public: + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success) const -> void + { + // Philox generator for accelerator + auto generator = alpaka::rand::Philox4x32x10(42, 12345, 6789); + genNumbers(acc, success, generator); + } +}; + +template +class PhiloxTestKernelVector +{ + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC void genNumbers(TAcc const& acc, bool* success, T_Generator& gen) const + { + { + static_cast(acc); + using DistributionResult = typename T_Generator::template ResultContainer; + alpaka::rand::UniformReal dist; + auto const result = dist(gen); + for(auto& element : result) + { + ALPAKA_CHECK(*success, static_cast(0.0) <= element); + ALPAKA_CHECK(*success, static_cast(1.0) > element); + } + } + } + +public: + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success) const -> void + { + // Philox generator for accelerator + auto generator = alpaka::rand::Philox4x32x10Vector(42, 12345, 6789); + genNumbers(acc, success, generator); + } +}; + +class PhiloxTestKernelStateless +{ + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC void genNumbers(TAcc const& acc, bool* success) const + { + { + static_cast(acc); + + using Gen = alpaka::rand::PhiloxStateless4x32x10Vector; + using Key = typename Gen::Key; + using Counter = typename Gen::Counter; + + Key key = {42, 12345}; + Counter counter = {6789, 321, 0, 0}; + auto const result = Gen::generate(counter, key); + + size_t check = 0; + for(auto& element : result) + { + check += element; + } + // Make sure the sequence is not in fact supposed to generate {0,0,0,0}. + ALPAKA_CHECK(*success, check != 0); + } + } + +public: + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success) const -> void + { + genNumbers(acc, success); + } +}; + +TEMPLATE_LIST_TEST_CASE("PhiloxRandomGeneratorStatelessIsWorking", "[rand]", alpaka::test::TestAccs) +{ + using Acc = TestType; + using Idx = alpaka::Idx; + using Dim = alpaka::Dim; + + alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::ones()); + + PhiloxTestKernelStateless kernel; + + REQUIRE(fixture(kernel)); +} + +using TestScalars = std::tuple; +using TestTypes = alpaka::meta::CartesianProduct; + +TEMPLATE_LIST_TEST_CASE("PhiloxRandomGeneratorSingleIsWorking", "[rand]", TestTypes) +{ + using Acc = std::tuple_element_t<0, TestType>; + using DataType = std::tuple_element_t<1, TestType>; + using Idx = alpaka::Idx; + using Dim = alpaka::Dim; + + alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::ones()); + + PhiloxTestKernelSingle kernel; + + REQUIRE(fixture(kernel)); +} + +TEMPLATE_LIST_TEST_CASE("PhiloxRandomGeneratorVectorIsWorking", "[rand]", TestTypes) +{ + using Acc = std::tuple_element_t<0, TestType>; + using DataType = std::tuple_element_t<1, TestType>; + using Idx = alpaka::Idx; + using Dim = alpaka::Dim; + + alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::ones()); + + PhiloxTestKernelVector kernel; + + REQUIRE(fixture(kernel)); +} diff --git a/test/unit/vec/src/VecTest.cpp b/test/unit/vec/src/VecTest.cpp index eb0679dc838d..c428307e96a9 100644 --- a/test/unit/vec/src/VecTest.cpp +++ b/test/unit/vec/src/VecTest.cpp @@ -483,7 +483,8 @@ TEMPLATE_TEST_CASE("Vec generator constructor", "[vec]", std::size_t, int, unsig // Floating point types require a precision check instead of an exact == match if constexpr(std::is_floating_point::value) { - auto const precision = std::numeric_limits::epsilon() * 5; // Arbitrary precision requirement + // Arbitrary precision requirement + auto const precision = std::numeric_limits::epsilon() * 5; CHECK(std::abs(vec[i] - static_cast(i + 1)) < precision); } else