From 228d3d66a77b416388748815a1d8d63154f11763 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 11 Jan 2025 11:19:51 +0100 Subject: [PATCH 1/2] [dxbc] Bound-check dynamically indexed input registers Halo MCC reads undefined PS inputs otherwise. --- src/dxbc/dxbc_compiler.cpp | 52 +++++++++++++++++++++++++++++++++----- src/dxbc/dxbc_compiler.h | 17 ++++++++++++- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 685d3b44bd2..fe4b7ad1aaf 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -281,7 +281,7 @@ namespace dxvk { return this->emitDclGlobalFlags(ins); case DxbcOpcode::DclIndexRange: - return; // not needed for anything + return this->emitDclIndexRange(ins); case DxbcOpcode::DclTemps: return this->emitDclTemps(ins); @@ -375,6 +375,21 @@ namespace dxvk { } + void DxbcCompiler::emitDclIndexRange(const DxbcShaderInstruction& ins) { + // dcl_index_range has one operand: + // (0) Range start, either an input or output register + // (1) Range end + uint32_t index = ins.dst[0].idxDim - 1u; + + DxbcIndexRange range = { }; + range.type = ins.dst[0].type; + range.start = ins.dst[0].idx[index].offset; + range.length = ins.imm[0].u32; + + m_indexRanges.push_back(range); + } + + void DxbcCompiler::emitDclTemps(const DxbcShaderInstruction& ins) { // dcl_temps has one operand: // (imm0) Number of temp registers @@ -5737,14 +5752,37 @@ namespace dxvk { DxbcRegisterValue DxbcCompiler::emitRegisterLoadRaw( const DxbcRegister& reg) { - if (reg.type == DxbcOperandType::IndexableTemp) { - bool doBoundsCheck = reg.idx[1].relReg != nullptr; - DxbcRegisterValue vectorId = emitIndexLoad(reg.idx[1]); + // Try to find index range for the given register + const DxbcIndexRange* indexRange = nullptr; + + if (reg.idxDim && reg.idx[reg.idxDim - 1u].relReg) { + uint32_t offset = reg.idx[reg.idxDim - 1u].offset; + + for (const auto& range : m_indexRanges) { + if (reg.type == range.type && offset >= range.start && offset < range.start + range.length) + indexRange = ⦥ + } + } + + if (reg.type == DxbcOperandType::IndexableTemp || indexRange) { + bool doBoundsCheck = reg.idx[reg.idxDim - 1u].relReg != nullptr; if (doBoundsCheck) { - uint32_t boundsCheck = m_module.opULessThan( - m_module.defBoolType(), vectorId.id, - m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength)); + DxbcRegisterValue indexId = emitIndexLoad(reg.idx[reg.idxDim - 1u]); + uint32_t boundsCheck = 0u; + + if (reg.type == DxbcOperandType::IndexableTemp) { + boundsCheck = m_module.opULessThan( + m_module.defBoolType(), indexId.id, + m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength)); + } else { + uint32_t adjustedId = m_module.opISub(getVectorTypeId(indexId.type), + indexId.id, m_module.consti32(indexRange->start)); + + boundsCheck = m_module.opULessThan( + m_module.defBoolType(), adjustedId, + m_module.constu32(indexRange->length)); + } // Kind of ugly to have an empty else block here but there's no // way for us to know the current block ID for the phi below diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index 935c683d7fd..43457188877 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include "../spirv/spirv_module.h" @@ -135,6 +136,13 @@ namespace dxvk { uint32_t component = 0; }; + + struct DxbcIndexRange { + DxbcOperandType type; + uint32_t start; + uint32_t length; + }; + /** * \brief Vertex shader-specific structure @@ -445,6 +453,10 @@ namespace dxvk { // xfb output registers for geometry shaders std::vector m_xfbVars; + ///////////////////////////////////////////// + // Dynamically indexed input and output regs + std::vector m_indexRanges = { }; + ////////////////////////////////////////////////////// // Shader resource variables. These provide access to // constant buffers, samplers, textures, and UAVs. @@ -473,7 +485,7 @@ namespace dxvk { uint32_t m_vArrayLengthId = 0; uint32_t m_vArray = 0; - + //////////////////////////////////////////////////// // Per-vertex input and output blocks. Depending on // the shader stage, these may be declared as arrays. @@ -546,6 +558,9 @@ namespace dxvk { void emitDclGlobalFlags( const DxbcShaderInstruction& ins); + void emitDclIndexRange( + const DxbcShaderInstruction& ins); + void emitDclTemps( const DxbcShaderInstruction& ins); From e42ecbc2dcd821f9a64cb607af446b6da14fed07 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 11 Jan 2025 14:55:32 +0100 Subject: [PATCH 2/2] [dxbc] Improve code gen for zeroing workgroup memory --- src/dxbc/dxbc_compiler.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index fe4b7ad1aaf..6ab3e916334 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -6193,13 +6193,11 @@ namespace dxvk { uint32_t threadId = m_module.opLoad( intTypeId, m_cs.builtinLocalInvocationIndex); - - uint32_t strideId = m_module.constu32(numThreads); - uint32_t zeroId = m_module.constu32(0); + uint32_t zeroId = m_module.constu32(0); for (uint32_t e = 0; e < numElementsPerThread; e++) { uint32_t ofsId = m_module.opIAdd(intTypeId, threadId, - m_module.opIMul(intTypeId, strideId, m_module.constu32(e))); + m_module.constu32(numThreads * e)); uint32_t ptrId = m_module.opAccessChain( ptrTypeId, m_gRegs[i].varId, 1, &ofsId); @@ -6221,9 +6219,8 @@ namespace dxvk { m_module.opLabel(cond.labelIf); - uint32_t ofsId = m_module.opIAdd(intTypeId, - m_module.constu32(numThreads * numElementsPerThread), - threadId); + uint32_t ofsId = m_module.opIAdd(intTypeId, threadId, + m_module.constu32(numThreads * numElementsPerThread)); uint32_t ptrId = m_module.opAccessChain( ptrTypeId, m_gRegs[i].varId, 1, &ofsId);