Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename threadsPerGrid to elementsPerGrid #2362

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ auto example(TAccTag const&) -> int
// Define the work division for kernels to be run on devAcc and devHost
using Vec = alpaka::Vec<Dim, Idx>;
Vec const elementsPerThread(Vec::all(static_cast<Idx>(1)));
Vec const threadsPerGrid(Vec::all(static_cast<Idx>(10)));
Vec const elementsPerGrid(Vec::all(static_cast<Idx>(10)));

// Create host and device buffers
//
Expand Down Expand Up @@ -164,7 +164,7 @@ auto example(TAccTag const&) -> int

FillBufferKernel fillBufferKernel;

alpaka::KernelCfg<Host> const hostKernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Host> const hostKernelCfg = {elementsPerGrid, elementsPerThread};
auto const hostWorkDiv = alpaka::getValidWorkDiv(hostKernelCfg, devHost, fillBufferKernel, hostViewPlainPtrMdSpan);

alpaka::exec<Host>(hostQueue, hostWorkDiv, fillBufferKernel,
Expand Down Expand Up @@ -204,7 +204,7 @@ auto example(TAccTag const&) -> int
TestBufferKernel testBufferKernel;

// Let alpaka calculate good block and grid sizes given our full problem extent
alpaka::KernelCfg<Acc> const devKernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Acc> const devKernelCfg = {elementsPerGrid, elementsPerThread};
auto const devWorkDiv = alpaka::getValidWorkDiv(devKernelCfg, devAcc, testBufferKernel, deviceBufferMdSpan1);

alpaka::exec<Acc>(devQueue, devWorkDiv, testBufferKernel, deviceBufferMdSpan1);
Expand Down
4 changes: 2 additions & 2 deletions example/complex/src/complex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ auto example(TAccTag const&) -> int
Queue queue(devAcc);

// Define the work division
Idx const threadsPerGrid = 1u;
Idx const elementsPerGrid = 1u;
Idx const elementsPerThread = 1u;

ComplexKernel complexKernel;

alpaka::KernelCfg<Acc> const kernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Acc> const kernelCfg = {elementsPerGrid, elementsPerThread};

// Let alpaka calculate good block and grid sizes given our full problem extent
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, devAcc, complexKernel);
Expand Down
4 changes: 2 additions & 2 deletions example/convolution1D/src/convolution1D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ auto example(TAccTag const&) -> int

auto const elementsPerThread = Vec::all(static_cast<Idx>(1));
// Grid size
auto const threadsPerGrid = inputSize;
auto const elementsPerGrid = inputSize;

// Instantiate the kernel (gpu code) function-object
ConvolutionKernel convolutionKernel;
Expand All @@ -140,7 +140,7 @@ auto example(TAccTag const&) -> int
DataType* nativeInputDeviceMemory = std::data(inputDeviceMemory);
DataType* nativeOutputDeviceMemory = std::data(outputDeviceMemory);

alpaka::KernelCfg<DevAcc> const kernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<DevAcc> const kernelCfg = {elementsPerGrid, elementsPerThread};

// Let alpaka calculate good block and grid sizes given our full problem extent
auto const workDiv = alpaka::getValidWorkDiv(
Expand Down
4 changes: 2 additions & 2 deletions example/helloWorld/src/helloWorld.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ auto example(TAccTag const&) -> int
// vector processing unit.
using Vec = alpaka::Vec<Dim, Idx>;
auto const elementsPerThread = Vec::all(static_cast<Idx>(1));
auto const threadsPerGrid = Vec{4, 2, 4};
auto const elementsPerGrid = Vec{4, 2, 4};

// Instantiate the kernel function object
//
Expand All @@ -135,7 +135,7 @@ auto example(TAccTag const&) -> int
// argument. So a kernel can be a class or struct, a lambda, etc.
HelloWorldKernel helloWorldKernel;

alpaka::KernelCfg<Acc> const kernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Acc> const kernelCfg = {elementsPerGrid, elementsPerThread};

// Let alpaka calculate good block and grid sizes given our full problem extent
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, devAcc, helloWorldKernel);
Expand Down
4 changes: 2 additions & 2 deletions example/helloWorldLambda/src/helloWorldLambda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ auto example(TAccTag const&) -> int
// Define the work division
using Vec = alpaka::Vec<Dim, Idx>;
auto const elementsPerThread = Vec::all(static_cast<Idx>(1));
auto const threadsPerGrid = Vec{4, 2, 4};
auto const elementsPerGrid = Vec{4, 2, 4};


size_t const nExclamationMarks = 10;
Expand Down Expand Up @@ -117,7 +117,7 @@ auto example(TAccTag const&) -> int
printf("\n");
};

alpaka::KernelCfg<Acc> const kernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Acc> const kernelCfg = {elementsPerGrid, elementsPerThread};

// Let alpaka calculate good block and grid sizes given our full problem extent
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, devAcc, kernelLambda, nExclamationMarks);
Expand Down
4 changes: 2 additions & 2 deletions example/kernelSpecialization/src/kernelSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ auto example(TAccTag const&) -> int
Queue queue(devAcc);

// Define the work division
std::size_t const threadsPerGrid = 16u;
std::size_t const elementsPerGrid = 16u;
std::size_t const elementsPerThread = 1u;
Kernel kernel;

alpaka::KernelCfg<Acc> const kernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Acc> const kernelCfg = {elementsPerGrid, elementsPerThread};

// Let alpaka calculate good block and grid sizes given our full problem extent
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, devAcc, kernel);
Expand Down
4 changes: 2 additions & 2 deletions example/openMPSchedule/src/openMPSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ auto main() -> int
Queue queue(devAcc);

// Define the work division
Idx const threadsPerGrid = 16u;
Idx const elementsPerGrid = 16u;
Idx const elementsPerThread = 1u;

OpenMPScheduleDefaultKernel openMPScheduleDefaultKernel;

// Let alpaka calculate good block and grid sizes given our full problem extent
alpaka::KernelCfg<Acc> kernelCfg = {threadsPerGrid, elementsPerThread};
alpaka::KernelCfg<Acc> kernelCfg = {elementsPerGrid, elementsPerThread};
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, devAcc, openMPScheduleDefaultKernel);

// Run the kernel setting no schedule explicitly.
Expand Down
20 changes: 10 additions & 10 deletions test/unit/workDiv/src/WorkDivForKernelTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDiv.1D", "[workDivKernel]", TestAccs)

// Get the device properties and hard limits
auto const props = alpaka::getAccDevProps<Acc>(dev);
Idx const threadsPerGridTestValue = props.m_blockThreadCountMax * props.m_gridBlockCountMax;
Idx const elementsPerGridTestValue = props.m_blockThreadCountMax * props.m_gridBlockCountMax;

// Test the getValidWorkDiv function for threadsPerGridTestValue threads per grid.
alpaka::KernelCfg<Acc> const kernelCfg = {Vec{threadsPerGridTestValue}, Vec{1}};
// Test the getValidWorkDiv function for elementsPerGridTestValue threads per grid.
alpaka::KernelCfg<Acc> const kernelCfg = {Vec{elementsPerGridTestValue}, Vec{1}};
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, dev, kernel, 200ul);

// Test the isValidWorkDiv function
Expand All @@ -107,7 +107,7 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDiv.1D", "[workDivKernel]", TestAccs)
CHECK(threadsPerBlock <= threadsPerBlockLimit);

// Check that using the maximum number of threads per block is valid.
auto const validWorkDiv = WorkDiv{Vec{threadsPerGridTestValue / threadsPerBlock}, Vec{threadsPerBlock}, Vec{1}};
auto const validWorkDiv = WorkDiv{Vec{elementsPerGridTestValue / threadsPerBlock}, Vec{threadsPerBlock}, Vec{1}};
CHECK(alpaka::isValidWorkDiv<Acc>(validWorkDiv, dev, kernel, 200ul));

// Check that using too many threads per block is not valid.
Expand All @@ -122,7 +122,7 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDiv.1D", "[workDivKernel]", TestAccs)
if constexpr(alpaka::isSingleThreadAcc<Acc>)
{
// Check that the compute work division uses a single thread per block.
auto const expectedWorkDiv = WorkDiv{Vec{threadsPerGridTestValue}, Vec{1}, Vec{1}};
auto const expectedWorkDiv = WorkDiv{Vec{elementsPerGridTestValue}, Vec{1}, Vec{1}};
CHECK(workDiv == expectedWorkDiv);

// Check that a work division with more than one thread per block is not valid.
Expand Down Expand Up @@ -161,10 +161,10 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDiv.2D", "[workDivKernel]", TestAccs2D)

// Get the device properties and hard limits
auto const props = alpaka::getAccDevProps<Acc>(dev);
Idx const threadsPerGridTestValue = props.m_blockThreadCountMax * props.m_gridBlockCountMax;
Idx const elementsPerGridTestValue = props.m_blockThreadCountMax * props.m_gridBlockCountMax;

// Test getValidWorkDiv function for threadsPerGridTestValue threads per grid.
alpaka::KernelCfg<Acc> const kernelCfg = {Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1}};
// Test getValidWorkDiv function for elementsPerGridTestValue threads per grid.
alpaka::KernelCfg<Acc> const kernelCfg = {Vec{8, elementsPerGridTestValue / 8}, Vec{1, 1}};
auto const workDiv = alpaka::getValidWorkDiv(kernelCfg, dev, kernel, 200ul);

// Test the isValidWorkDiv function
Expand All @@ -184,7 +184,7 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDiv.2D", "[workDivKernel]", TestAccs2D)

// Check that using the maximum number of threads per block is valid.
auto const validWorkDiv
= WorkDiv{Vec{8, threadsPerGridTestValue / threadsPerBlock / 8}, Vec{1, threadsPerBlock}, Vec{1, 1}};
= WorkDiv{Vec{8, elementsPerGridTestValue / threadsPerBlock / 8}, Vec{1, threadsPerBlock}, Vec{1, 1}};
CHECK(alpaka::isValidWorkDiv<Acc>(validWorkDiv, dev, kernel, 200ul));

// Check that using too many threads per block is not valid.
Expand All @@ -199,7 +199,7 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDiv.2D", "[workDivKernel]", TestAccs2D)
if constexpr(alpaka::isSingleThreadAcc<Acc>)
{
// Check that the compute work division uses a single thread per block.
auto const expectedWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1}, Vec{1, 1}};
auto const expectedWorkDiv = WorkDiv{Vec{8, elementsPerGridTestValue / 8}, Vec{1, 1}, Vec{1, 1}};
CHECK(workDiv == expectedWorkDiv);

// Check that a work division with more than one thread per block is not valid.
Expand Down
Loading