From e42ecbc2dcd821f9a64cb607af446b6da14fed07 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 11 Jan 2025 14:55:32 +0100 Subject: [PATCH] [dxbc] Improve code gen for zeroing workgroup memory --- src/dxbc/dxbc_compiler.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index fe4b7ad1aaf..6ab3e916334 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -6193,13 +6193,11 @@ namespace dxvk { uint32_t threadId = m_module.opLoad( intTypeId, m_cs.builtinLocalInvocationIndex); - - uint32_t strideId = m_module.constu32(numThreads); - uint32_t zeroId = m_module.constu32(0); + uint32_t zeroId = m_module.constu32(0); for (uint32_t e = 0; e < numElementsPerThread; e++) { uint32_t ofsId = m_module.opIAdd(intTypeId, threadId, - m_module.opIMul(intTypeId, strideId, m_module.constu32(e))); + m_module.constu32(numThreads * e)); uint32_t ptrId = m_module.opAccessChain( ptrTypeId, m_gRegs[i].varId, 1, &ofsId); @@ -6221,9 +6219,8 @@ namespace dxvk { m_module.opLabel(cond.labelIf); - uint32_t ofsId = m_module.opIAdd(intTypeId, - m_module.constu32(numThreads * numElementsPerThread), - threadId); + uint32_t ofsId = m_module.opIAdd(intTypeId, threadId, + m_module.constu32(numThreads * numElementsPerThread)); uint32_t ptrId = m_module.opAccessChain( ptrTypeId, m_gRegs[i].varId, 1, &ofsId);