From 1c44a5929d917194774d074f6860e53d095252b9 Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Tue, 30 Jul 2024 14:23:56 +0200
Subject: [PATCH 1/3] Fix getValidWorkDivForKernel tests for the CUDA backend

---
 .../unit/workDiv/src/WorkDivForKernelTest.cpp | 93 ++++++-------------
 1 file changed, 28 insertions(+), 65 deletions(-)
diff --git a/test/unit/workDiv/src/WorkDivForKernelTest.cpp b/test/unit/workDiv/src/WorkDivForKernelTest.cpp
index 0cdc54c611db..410ce20d25f6 100644
--- a/test/unit/workDiv/src/WorkDivForKernelTest.cpp
+++ b/test/unit/workDiv/src/WorkDivForKernelTest.cpp
@@ -99,25 +99,15 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.1D", "[workDivKernel]", TestAc
     auto const isValid = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, workDiv);
     CHECK(isValid == true);
 
-    if constexpr(alpaka::accMatchesTags<Acc, alpaka::TagGpuCudaRt>)
-    {
-        // Get calculated threads per block from the workDiv found by examining kernel function
-        auto const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
-        // Get hard limits
-        auto const threadsPerBlockLimit = props.m_blockThreadCountMax;
-
-        // Depending on the GPU type or the compiler the test below might fail because threadsPerBlock can be equal to
-        // threadsPerBlockLimit, which is the max device limit.
-        CHECK(threadsPerBlock < static_cast<Idx>(threadsPerBlockLimit));
-    }
-    else if constexpr(alpaka::accMatchesTags<
-                          Acc,
-                          alpaka::TagGpuHipRt,
-                          alpaka::TagCpuThreads,
-                          alpaka::TagCpuOmp2Threads,
-                          alpaka::TagFpgaSyclIntel,
-                          alpaka::TagGpuSyclIntel,
-                          alpaka::TagGenericSycl>)
+    if constexpr(alpaka::accMatchesTags<
+                     Acc,
+                     alpaka::TagGpuCudaRt,
+                     alpaka::TagGpuHipRt,
+                     alpaka::TagCpuThreads,
+                     alpaka::TagCpuOmp2Threads,
+                     alpaka::TagFpgaSyclIntel,
+                     alpaka::TagGpuSyclIntel,
+                     alpaka::TagGenericSycl>)
     {
         // Get calculated threads per block from the workDiv found by examining kernel function
         auto const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
@@ -180,59 +170,32 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.2D", "[workDivKernel]", TestAc
     auto const isValid = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, workDiv);
     CHECK(isValid == true);
 
-    if constexpr(alpaka::accMatchesTags<Acc, alpaka::TagGpuCudaRt>)
-    {
-        // Expected valid workdiv values for this kernel might change depending on the GPU type and compiler. Therefore
-        // generated workdiv is not compared to a specific workdiv in this test.
-
-        // Get calculated threads per block from the workDiv that was found by examining kernel function
-        auto const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
-        // Get hard limits
-        auto const threadsPerBlockLimit = props.m_blockThreadCountMax;
-
-        // Depending on the GPU type or the compiler the test below might fail because threadsPerBlock can be equal to
-        // threadsPerBlockLimit, which is the max device limit.
-        CHECK(threadsPerBlock < static_cast<Idx>(threadsPerBlockLimit));
-
-        // too many threads per block
-        auto const invalidWorkDiv
-            = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{2 * threadsPerBlock, 1}, Vec{1, 1}};
-        auto isWorkDivValidForCuda = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, invalidWorkDiv);
-        CHECK(isWorkDivValidForCuda == false);
-
-        auto const validWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, threadsPerBlock}, Vec{1, 1}};
-        isWorkDivValidForCuda = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, validWorkDiv);
-        CHECK(isWorkDivValidForCuda == true);
-    }
-    else if constexpr(alpaka::accMatchesTags<
-                          Acc,
-                          alpaka::TagGpuHipRt,
-                          alpaka::TagCpuThreads,
-                          alpaka::TagCpuOmp2Threads,
-                          alpaka::TagFpgaSyclIntel,
-                          alpaka::TagGpuSyclIntel,
-                          alpaka::TagGenericSycl>)
+    // The valid workdiv values for this kernel might change depending on the GPU type and compiler.
+    // Therefore the generated workdiv is not compared to a specific workdiv in this test.
+    if constexpr(alpaka::accMatchesTags<
+                     Acc,
+                     alpaka::TagGpuCudaRt,
+                     alpaka::TagGpuHipRt,
+                     alpaka::TagCpuThreads,
+                     alpaka::TagCpuOmp2Threads,
+                     alpaka::TagFpgaSyclIntel,
+                     alpaka::TagGpuSyclIntel,
+                     alpaka::TagGenericSycl>)
     {
         // Get calculated threads per block from the workDiv that was found by examining the kernel function
         auto const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
         // Get hard limits
         auto const threadsPerBlockLimit = props.m_blockThreadCountMax;
-        // Depending on the GPU type or the compiler this test might fail because threadsPerBlock can be less than
-        // threadsPerBlockLimit, which is the max device limit.
-        if(threadsPerBlockLimit == 1)
-            CHECK(threadsPerBlock == static_cast<Idx>(threadsPerBlockLimit));
-        else
-            CHECK(threadsPerBlock < static_cast<Idx>(threadsPerBlockLimit));
-
-        // too many threads per block
-        auto const invalidWorkDiv
-            = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{20 * threadsPerBlock, 1}, Vec{1, 1}};
-        auto isWorkDivValidForHip = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, invalidWorkDiv);
-        CHECK(isWorkDivValidForHip == false);
+        // Check that the number of threads per block is within the device limit.
+        CHECK(threadsPerBlock <= static_cast<Idx>(threadsPerBlockLimit));
 
+        // Check that using the maximum number of threads per block is valid.
         auto const validWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, threadsPerBlock}, Vec{1, 1}};
-        isWorkDivValidForHip = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, validWorkDiv);
-        CHECK(isWorkDivValidForHip == true);
+        CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, validWorkDiv));
+
+        // Check that using too many threads per block is not valid.
+        auto const invalidWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{20, threadsPerBlock}, Vec{1, 1}};
+        CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, invalidWorkDiv));
     }
     else if constexpr(alpaka::accMatchesTags<
                           Acc,

From a7fd020c23c473c1006ddaecc4555ebe3d822dba Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Tue, 30 Jul 2024 14:41:43 +0200
Subject: [PATCH 2/3] Fix getValidWorkDivForKernel tests for the SYCL CPU
 backend

---
 test/unit/workDiv/src/WorkDivForKernelTest.cpp | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/test/unit/workDiv/src/WorkDivForKernelTest.cpp b/test/unit/workDiv/src/WorkDivForKernelTest.cpp
index 410ce20d25f6..64144fc317af 100644
--- a/test/unit/workDiv/src/WorkDivForKernelTest.cpp
+++ b/test/unit/workDiv/src/WorkDivForKernelTest.cpp
@@ -105,6 +105,7 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.1D", "[workDivKernel]", TestAc
                      alpaka::TagGpuHipRt,
                      alpaka::TagCpuThreads,
                      alpaka::TagCpuOmp2Threads,
+                     alpaka::TagCpuSycl,
                      alpaka::TagFpgaSyclIntel,
                      alpaka::TagGpuSyclIntel,
                      alpaka::TagGenericSycl>)
@@ -116,12 +117,8 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.1D", "[workDivKernel]", TestAc
 
         CHECK(threadsPerBlock <= static_cast<Idx>(threadsPerBlockLimit));
     }
-    else if constexpr(alpaka::accMatchesTags<
-                          Acc,
-                          alpaka::TagCpuSerial,
-                          alpaka::TagCpuOmp2Blocks,
-                          alpaka::TagCpuTbbBlocks,
-                          alpaka::TagCpuSycl>)
+    else if constexpr(alpaka::
+                          accMatchesTags<Acc, alpaka::TagCpuSerial, alpaka::TagCpuOmp2Blocks, alpaka::TagCpuTbbBlocks>)
     {
         // CPU must have only 1 thread per block. In other words, number of blocks is equal to number of threads.
         CHECK(workDiv == WorkDiv{Vec{threadsPerGridTestValue}, Vec{1}, Vec{1}});
@@ -178,6 +175,7 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.2D", "[workDivKernel]", TestAc
                      alpaka::TagGpuHipRt,
                      alpaka::TagCpuThreads,
                      alpaka::TagCpuOmp2Threads,
+                     alpaka::TagCpuSycl,
                      alpaka::TagFpgaSyclIntel,
                      alpaka::TagGpuSyclIntel,
                      alpaka::TagGenericSycl>)
@@ -197,12 +195,8 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.2D", "[workDivKernel]", TestAc
         auto const invalidWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{20, threadsPerBlock}, Vec{1, 1}};
         CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, invalidWorkDiv));
     }
-    else if constexpr(alpaka::accMatchesTags<
-                          Acc,
-                          alpaka::TagCpuSerial,
-                          alpaka::TagCpuOmp2Blocks,
-                          alpaka::TagCpuTbbBlocks,
-                          alpaka::TagCpuSycl>)
+    else if constexpr(alpaka::
+                          accMatchesTags<Acc, alpaka::TagCpuSerial, alpaka::TagCpuOmp2Blocks, alpaka::TagCpuTbbBlocks>)
     {
         // CPU must have only 1 thread per block. In other words, number of blocks is equal to number of threads.
         CHECK(workDiv == WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1}, Vec{1, 1}});

From 6206326fa31f8ce4e4054c639bd8638c538c6873 Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Tue, 30 Jul 2024 19:52:13 +0200
Subject: [PATCH 3/3] Rewrite the getValidWorkDivForKernel tests

---
 .../unit/workDiv/src/WorkDivForKernelTest.cpp | 185 +++++++++---------
 1 file changed, 97 insertions(+), 88 deletions(-)

diff --git a/test/unit/workDiv/src/WorkDivForKernelTest.cpp b/test/unit/workDiv/src/WorkDivForKernelTest.cpp
index 64144fc317af..f6de4a020776 100644
--- a/test/unit/workDiv/src/WorkDivForKernelTest.cpp
+++ b/test/unit/workDiv/src/WorkDivForKernelTest.cpp
@@ -69,7 +69,7 @@ struct TestKernelWithManyRegisters
         double sum = var0 + var1 + var2 + var3 + var4 + var5 + var6 + var7 + var8 + var9 + var10 + var11 + var12
                      + var13 + var14 + var15 + var16 + var17 + var18 + var19 + var20 + var21 + var22 + var23 + var24
                      + var25 + var26 + var27 + var28 + var29 + var30 + var31 + var32 + var33 + var34 + var35;
-        printf("The sum is %5.2f, the argument is %lu ", sum, val);
+        printf("The sum is %5.2f, the argument is %lu\n", sum, val);
     }
 };
 
@@ -86,55 +86,64 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.1D", "[workDivKernel]", TestAc
     auto const dev = alpaka::getDevByIdx(platform, 0);
 
     TestKernelWithManyRegisters kernel;
-    auto const bundeledKernel = alpaka::KernelBundle(kernel, 200ul);
+    auto const kernelBundle = alpaka::KernelBundle(kernel, 200ul);
 
-    // Get hard limits for test
-    auto const props = alpaka::getAccDevProps<Acc, decltype(dev)>(dev);
+    // Get the device properties and hard limits
+    auto const props = alpaka::getAccDevProps<Acc>(dev);
     Idx const threadsPerGridTestValue = props.m_blockThreadCountMax * props.m_gridBlockCountMax;
 
-    // Test getValidWorkDivForKernel for threadsPerGridTestValue threads per grid
+    // Test the getValidWorkDivForKernel function for threadsPerGridTestValue threads per grid.
     auto const workDiv
-        = alpaka::getValidWorkDivForKernel<Acc>(dev, bundeledKernel, Vec{threadsPerGridTestValue}, Vec{1});
-    // Test validity
-    auto const isValid = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, workDiv);
-    CHECK(isValid == true);
+        = alpaka::getValidWorkDivForKernel<Acc>(dev, kernelBundle, Vec{threadsPerGridTestValue}, Vec{1});
+
+    // Test the isValidWorkDivKernel function
+    CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, workDiv));
+
+    // Get calculated threads per block from the workDiv that was found by examining the kernel function.
+    Idx const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
+
+    // Get the device limit.
+    Idx const threadsPerBlockLimit = props.m_blockThreadCountMax;
+
+    // Check that the number of threads per block is within the device limit.
+    CHECK(threadsPerBlock <= threadsPerBlockLimit);
+
+    // Check that using the maximum number of threads per block is valid.
+    auto const validWorkDiv = WorkDiv{Vec{threadsPerGridTestValue / threadsPerBlock}, Vec{threadsPerBlock}, Vec{1}};
+    CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, validWorkDiv));
+
+    // Check that using too many threads per block is not valid.
+    auto const invalidThreads = WorkDiv{Vec{1}, Vec{2 * threadsPerBlockLimit}, Vec{1}};
+    CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, invalidThreads));
+
+    // Check that a work division with a single block, thread and element is always valid
+    auto const serialWorkDiv = WorkDiv{Vec{1}, Vec{1}, Vec{1}};
+    CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, serialWorkDiv));
 
+    // Some accelerators support only one thread per block:
+    if constexpr(alpaka::isSingleThreadAcc<Acc>)
+    {
+        // Check that the compute work division uses a single thread per block.
+        auto const expectedWorkDiv = WorkDiv{Vec{threadsPerGridTestValue}, Vec{1}, Vec{1}};
+        CHECK(workDiv == expectedWorkDiv);
+
+        // Check that a work division with more than one thread per block is not valid.
+        auto const parallelWorkDiv = WorkDiv{Vec{1}, Vec{2}, Vec{1}};
+        CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, parallelWorkDiv));
+    }
+
+    // Check the maxDynamicSharedSizeBytes for CPU backends
     if constexpr(alpaka::accMatchesTags<
                      Acc,
-                     alpaka::TagGpuCudaRt,
-                     alpaka::TagGpuHipRt,
+                     alpaka::TagCpuSerial,
                      alpaka::TagCpuThreads,
+                     alpaka::TagCpuOmp2Blocks,
                      alpaka::TagCpuOmp2Threads,
-                     alpaka::TagCpuSycl,
-                     alpaka::TagFpgaSyclIntel,
-                     alpaka::TagGpuSyclIntel,
-                     alpaka::TagGenericSycl>)
-    {
-        // Get calculated threads per block from the workDiv found by examining kernel function
-        auto const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
-        // Get hard limits
-        auto const threadsPerBlockLimit = props.m_blockThreadCountMax;
-
-        CHECK(threadsPerBlock <= static_cast<Idx>(threadsPerBlockLimit));
-    }
-    else if constexpr(alpaka::
-                          accMatchesTags<Acc, alpaka::TagCpuSerial, alpaka::TagCpuOmp2Blocks, alpaka::TagCpuTbbBlocks>)
-    {
-        // CPU must have only 1 thread per block. In other words, number of blocks is equal to number of threads.
-        CHECK(workDiv == WorkDiv{Vec{threadsPerGridTestValue}, Vec{1}, Vec{1}});
-        // Test a new 1D workdiv. Threads per block can not be larger than 1 for CPU. Hence 2 is not valid.
-        auto const workDiv1DUsingInitList = WorkDiv{Vec{threadsPerGridTestValue / 2}, Vec{2}, Vec{1}};
-        auto const isWorkDivValidForCPU
-            = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, workDiv1DUsingInitList);
-        CHECK(isWorkDivValidForCPU == false);
-        // Check maxDynamicSharedSizeBytes for CPU backends
-        auto const funcAttributes = alpaka::getFunctionAttributes<Acc>(dev, bundeledKernel);
-        CHECK(
-            funcAttributes.maxDynamicSharedSizeBytes == static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024));
-    }
-    else
+                     alpaka::TagCpuTbbBlocks>)
     {
-        throw std::invalid_argument("Acc type is not among tested Accs.");
+        int const maxDynamicSharedSizeBytes
+            = alpaka::getFunctionAttributes<Acc>(dev, kernelBundle).maxDynamicSharedSizeBytes;
+        CHECK(maxDynamicSharedSizeBytes == static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024));
     }
 }
 
@@ -151,67 +160,67 @@ TEMPLATE_LIST_TEST_CASE("getValidWorkDivForKernel.2D", "[workDivKernel]", TestAc
     auto const dev = alpaka::getDevByIdx(platform, 0);
 
     TestKernelWithManyRegisters kernel;
-    // A random value
-    size_t val(200ul);
-    auto const bundeledKernel = alpaka::KernelBundle(kernel, val);
+    auto const kernelBundle = alpaka::KernelBundle(kernel, 200ul);
 
-    // Get hard limits for test
+    // Get the device properties and hard limits
     auto const props = alpaka::getAccDevProps<Acc>(dev);
     Idx const threadsPerGridTestValue = props.m_blockThreadCountMax * props.m_gridBlockCountMax;
 
     // Test getValidWorkDivForKernel function for threadsPerGridTestValue threads per grid.
     auto const workDiv
-        = alpaka::getValidWorkDivForKernel<Acc>(dev, bundeledKernel, Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1});
+        = alpaka::getValidWorkDivForKernel<Acc>(dev, kernelBundle, Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1});
 
-    // Test isValidWorkDivKernel function
-    auto const isValid = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, workDiv);
-    CHECK(isValid == true);
+    // Test the isValidWorkDivKernel function
+    CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, workDiv));
 
-    // The valid workdiv values for this kernel might change depending on the GPU type and compiler.
+    // The valid workdiv values for the kernel may change depending on the GPU type and compiler.
     // Therefore the generated workdiv is not compared to a specific workdiv in this test.
+
+    // Get calculated threads per block from the workDiv that was found by examining the kernel function.
+    Idx const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
+
+    // Get the device limit.
+    Idx const threadsPerBlockLimit = props.m_blockThreadCountMax;
+
+    // Check that the number of threads per block is within the device limit.
+    CHECK(threadsPerBlock <= threadsPerBlockLimit);
+
+    // Check that using the maximum number of threads per block is valid.
+    auto const validWorkDiv
+        = WorkDiv{Vec{8, threadsPerGridTestValue / threadsPerBlock / 8}, Vec{1, threadsPerBlock}, Vec{1, 1}};
+    CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, validWorkDiv));
+
+    // Check that using too many threads per block is not valid.
+    auto const invalidThreads = WorkDiv{Vec{1, 1}, Vec{2, threadsPerBlockLimit}, Vec{1, 1}};
+    CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, invalidThreads));
+
+    // Check that a work division with a single block, thread and element is always valid
+    auto const serialWorkDiv = WorkDiv{Vec{1, 1}, Vec{1, 1}, Vec{1, 1}};
+    CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, serialWorkDiv));
+
+    // Some accelerators support only one thread per block:
+    if constexpr(alpaka::isSingleThreadAcc<Acc>)
+    {
+        // Check that the compute work division uses a single thread per block.
+        auto const expectedWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1}, Vec{1, 1}};
+        CHECK(workDiv == expectedWorkDiv);
+
+        // Check that a work division with more than one thread per block is not valid.
+        auto const parallelWorkDiv = WorkDiv{Vec{1, 1}, Vec{1, 2}, Vec{1, 1}};
+        CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, kernelBundle, parallelWorkDiv));
+    }
+
+    // Check the maxDynamicSharedSizeBytes for CPU backends
     if constexpr(alpaka::accMatchesTags<
                      Acc,
-                     alpaka::TagGpuCudaRt,
-                     alpaka::TagGpuHipRt,
+                     alpaka::TagCpuSerial,
                      alpaka::TagCpuThreads,
+                     alpaka::TagCpuOmp2Blocks,
                      alpaka::TagCpuOmp2Threads,
-                     alpaka::TagCpuSycl,
-                     alpaka::TagFpgaSyclIntel,
-                     alpaka::TagGpuSyclIntel,
-                     alpaka::TagGenericSycl>)
-    {
-        // Get calculated threads per block from the workDiv that was found by examining the kernel function
-        auto const threadsPerBlock = workDiv.m_blockThreadExtent.prod();
-        // Get hard limits
-        auto const threadsPerBlockLimit = props.m_blockThreadCountMax;
-        // Check that the number of threads per block is within the device limit.
-        CHECK(threadsPerBlock <= static_cast<Idx>(threadsPerBlockLimit));
-
-        // Check that using the maximum number of threads per block is valid.
-        auto const validWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, threadsPerBlock}, Vec{1, 1}};
-        CHECK(alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, validWorkDiv));
-
-        // Check that using too many threads per block is not valid.
-        auto const invalidWorkDiv = WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{20, threadsPerBlock}, Vec{1, 1}};
-        CHECK(not alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, invalidWorkDiv));
-    }
-    else if constexpr(alpaka::
-                          accMatchesTags<Acc, alpaka::TagCpuSerial, alpaka::TagCpuOmp2Blocks, alpaka::TagCpuTbbBlocks>)
-    {
-        // CPU must have only 1 thread per block. In other words, number of blocks is equal to number of threads.
-        CHECK(workDiv == WorkDiv{Vec{8, threadsPerGridTestValue / 8}, Vec{1, 1}, Vec{1, 1}});
-        // Test a new 2D workdiv. Threads per block can not be larger than 1 for CPU. Hence 2x1 threads is not valid.
-        auto const invalidWorkDiv2D = WorkDiv{Vec{1, 2048}, Vec{1, 2}, Vec{1, 1}};
-        auto const isWorkDivValidForCpu = alpaka::isValidWorkDivKernel<Acc>(dev, bundeledKernel, invalidWorkDiv2D);
-        CHECK(isWorkDivValidForCpu == false);
-
-        // Check maxDynamicSharedSizeBytes for CPU backends
-        CHECK(
-            alpaka::getFunctionAttributes<Acc>(dev, bundeledKernel).maxDynamicSharedSizeBytes
-            == static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024));
-    }
-    else
+                     alpaka::TagCpuTbbBlocks>)
     {
-        throw std::invalid_argument("Acc type is not among tested Accs.");
+        int const maxDynamicSharedSizeBytes
+            = alpaka::getFunctionAttributes<Acc>(dev, kernelBundle).maxDynamicSharedSizeBytes;
+        CHECK(maxDynamicSharedSizeBytes == static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024));
     }
 }