diff --git a/src/dxbc/dxbc_analysis.cpp b/src/dxbc/dxbc_analysis.cpp index c9c98017..a230da97 100644 --- a/src/dxbc/dxbc_analysis.cpp +++ b/src/dxbc/dxbc_analysis.cpp @@ -34,7 +34,17 @@ namespace dxvk { m_analysis->uavInfos[registerId].accessAtomicOp = true; } } break; - + + case DxbcInstClass::TextureSample: + case DxbcInstClass::VectorDeriv: { + m_analysis->usesDerivatives = true; + } break; + + case DxbcInstClass::ControlFlow: { + if (ins.op == DxbcOpcode::Discard) + m_analysis->usesKill = true; + } break; + case DxbcInstClass::TypedUavLoad: { const uint32_t registerId = ins.src[1].idx[0].offset; m_analysis->uavInfos[registerId].accessTypedLoad = true; diff --git a/src/dxbc/dxbc_analysis.h b/src/dxbc/dxbc_analysis.h index d51ede59..5059ae84 100644 --- a/src/dxbc/dxbc_analysis.h +++ b/src/dxbc/dxbc_analysis.h @@ -37,6 +37,9 @@ namespace dxvk { DxbcClipCullInfo clipCullIn; DxbcClipCullInfo clipCullOut; + + bool usesDerivatives = false; + bool usesKill = false; }; /** diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index b3b83136..abeb1658 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -2003,9 +2003,27 @@ namespace dxvk { // (dst0) Register that receives the result // (dst1) Destination u# or g# register // (srcX) As above + const DxbcBufferInfo bufferInfo = getBufferInfo(ins.dst[ins.dstCount - 1]); + const bool isImm = ins.dstCount == 2; const bool isUav = ins.dst[ins.dstCount - 1].type == DxbcOperandType::UnorderedAccessView; + // Perform atomic operations on UAVs only if the UAV + // is bound and if there is nothing else stopping us. + DxbcConditional cond; + + if (isUav) { + uint32_t writeTest = emitUavWriteTest(bufferInfo); + + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + } + // Retrieve destination pointer for the atomic operation> const DxbcRegisterPointer pointer = emitGetAtomicPointer( ins.dst[ins.dstCount - 1], ins.src[0]); @@ -2126,6 +2144,12 @@ namespace dxvk { // register if this is an imm_atomic_* opcode. if (isImm) emitRegisterStore(ins.dst[0], value); + + // End conditional block + if (isUav) { + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); + } } @@ -2133,12 +2157,25 @@ namespace dxvk { // imm_atomic_alloc and imm_atomic_consume have the following operands: // (dst0) The register that will hold the old counter value // (dst1) The UAV whose counter is going to be modified - // TODO check if the corresponding UAV is bound + const DxbcBufferInfo bufferInfo = getBufferInfo(ins.dst[1]); + const uint32_t registerId = ins.dst[1].idx[0].offset; if (m_uavs.at(registerId).ctrId == 0) m_uavs.at(registerId).ctrId = emitDclUavCounter(registerId); + // Only perform the operation if the UAV is bound + uint32_t writeTest = emitUavWriteTest(bufferInfo); + + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + // Get a pointer to the atomic counter in question DxbcRegisterInfo ptrType; ptrType.type.ctype = DxbcScalarType::Uint32; @@ -2187,6 +2224,10 @@ namespace dxvk { // Store the result emitRegisterStore(ins.dst[0], value); + + // End conditional block + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); } @@ -3254,23 +3295,37 @@ namespace dxvk { // (dst0) The destination UAV // (src0) The texture or buffer coordinates // (src1) The value to store - const uint32_t registerId = ins.dst[0].idx[0].offset; - const DxbcUav uavInfo = m_uavs.at(registerId); + const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]); + + // Execute write op only if the UAV is bound + uint32_t writeTest = emitUavWriteTest(uavInfo); + + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge (cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); // Load texture coordinates - DxbcRegisterValue texCoord = emitLoadTexCoord( - ins.src[0], uavInfo.imageInfo); + DxbcRegisterValue texCoord = emitLoadTexCoord(ins.src[0], uavInfo.image); // Load the value that will be written to the image. We'll // have to cast it to the component type of the image. const DxbcRegisterValue texValue = emitRegisterBitcast( emitRegisterLoad(ins.src[1], DxbcRegMask(true, true, true, true)), - uavInfo.sampledType); + uavInfo.stype); // Write the given value to the image m_module.opImageWrite( - m_module.opLoad(uavInfo.imageTypeId, uavInfo.varId), + m_module.opLoad(uavInfo.typeId, uavInfo.varId), texCoord.id, texValue.id, SpirvImageOperands()); + + // End conditional block + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); } @@ -3610,21 +3665,26 @@ namespace dxvk { const DxbcRegisterValue zeroTest = emitRegisterZeroTest( condition, ins.controls.zeroTest()); - // Insert a Pseudo-'If' block - const uint32_t discardBlock = m_module.allocateId(); - const uint32_t mergeBlock = m_module.allocateId(); - - m_module.opSelectionMerge(mergeBlock, - spv::SelectionControlMaskNone); - - m_module.opBranchConditional( - zeroTest.id, discardBlock, mergeBlock); - - // OpKill terminates the block - m_module.opLabel(discardBlock); - m_module.opKill(); - - m_module.opLabel(mergeBlock); + if (m_ps.killState == 0) { + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelIf, spv::SelectionControlMaskNone); + m_module.opBranchConditional(zeroTest.id, cond.labelIf, cond.labelEnd); + + // OpKill terminates the block + m_module.opLabel(cond.labelIf); + m_module.opKill(); + + m_module.opLabel(cond.labelEnd); + } else { + uint32_t typeId = m_module.defBoolType(); + + uint32_t killState = m_module.opLoad (typeId, m_ps.killState); + killState = m_module.opLogicalOr(typeId, killState, zeroTest.id); + m_module.opStore(m_ps.killState, killState); + } } @@ -4467,11 +4527,27 @@ namespace dxvk { // Cast source value to the expected data type value = emitRegisterBitcast(value, DxbcScalarType::Uint32); - // Shared memory is not accessed through a texel buffer view - const bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory; + // Thread Group Shared Memory is not accessed through a texel buffer view + const bool isUav = operand.type == DxbcOperandType::UnorderedAccessView; - const uint32_t bufferId = isTgsm - ? 0 : m_module.opLoad(bufferInfo.typeId, bufferInfo.varId); + // Perform UAV writes only if the UAV is bound and if there + // is nothing else preventing us from writing to it. + DxbcConditional cond; + + if (isUav) { + uint32_t writeTest = emitUavWriteTest(bufferInfo); + + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + } + + // Perform the actual write operation + const uint32_t bufferId = isUav ? m_module.opLoad(bufferInfo.typeId, bufferInfo.varId) : 0; const uint32_t scalarTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 1 }); const uint32_t vectorTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 4 }); @@ -4520,6 +4596,12 @@ namespace dxvk { srcComponentIndex += 1; } } + + // End conditional block + if (isUav) { + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); + } } @@ -5473,6 +5555,21 @@ namespace dxvk { } + uint32_t DxbcCompiler::emitUavWriteTest(const DxbcBufferInfo& uav) { + uint32_t typeId = m_module.defBoolType(); + uint32_t testId = uav.specId; + + if (m_ps.killState != 0) { + uint32_t killState = m_module.opLoad(typeId, m_ps.killState); + + testId = m_module.opLogicalAnd(typeId, testId, + m_module.opLogicalNot(typeId, killState)); + } + + return testId; + } + + void DxbcCompiler::emitInit() { // Set up common capabilities for all shaders m_module.enableCapability(spv::CapabilityShader); @@ -5677,6 +5774,16 @@ namespace dxvk { spv::BuiltInCullDistance, spv::StorageClassInput); + // We may have to defer kill operations to the end of + // the shader in order to keep derivatives correct. + if (m_analysis->usesKill && m_analysis->usesDerivatives && m_options.test(DxbcOption::DeferKill)) { + m_ps.killState = m_module.newVarInit( + m_module.defPointerType(m_module.defBoolType(), spv::StorageClassPrivate), + spv::StorageClassPrivate, m_module.constBool(false)); + + m_module.setDebugName(m_ps.killState, "ps_kill"); + } + // Main function of the pixel shader m_ps.functionId = m_module.allocateId(); m_module.setDebugName(m_ps.functionId, "ps_main"); @@ -5774,9 +5881,27 @@ namespace dxvk { this->emitInputSetup(); this->emitClipCullLoad(DxbcSystemValue::ClipDistance, m_clipDistances); this->emitClipCullLoad(DxbcSystemValue::CullDistance, m_cullDistances); + m_module.opFunctionCall( m_module.defVoidType(), m_ps.functionId, 0, nullptr); + + if (m_ps.killState != 0) { + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + uint32_t killTest = m_module.opLoad(m_module.defBoolType(), m_ps.killState); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(killTest, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + m_module.opKill(); + + m_module.opLabel(cond.labelEnd); + } + this->emitOutputSetup(); this->emitMainFunctionEnd(); } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index b734d4ec..38115a08 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -110,6 +110,20 @@ namespace dxvk { }; + /** + * \brief Helper struct for conditional execution + * + * Stores a set of labels required to implement either + * an if-then block or an if-then-else block. This is + * not used to implement control flow instructions. + */ + struct DxbcConditional { + uint32_t labelIf = 0; + uint32_t labelElse = 0; + uint32_t labelEnd = 0; + }; + + /** * \brief Vertex shader-specific structure */ @@ -151,6 +165,8 @@ namespace dxvk { uint32_t builtinSampleMaskIn = 0; uint32_t builtinSampleMaskOut = 0; uint32_t builtinLayer = 0; + + uint32_t killState = 0; }; @@ -940,6 +956,11 @@ namespace dxvk { DxbcSystemValue sv, uint32_t srcArray); + /////////////////////////////// + // Some state checking methods + uint32_t emitUavWriteTest( + const DxbcBufferInfo& uav); + ////////////////////////////////////// // Common function definition methods void emitInit(); diff --git a/src/dxbc/dxbc_module.cpp b/src/dxbc/dxbc_module.cpp index f70ab68e..4ac55266 100644 --- a/src/dxbc/dxbc_module.cpp +++ b/src/dxbc/dxbc_module.cpp @@ -54,13 +54,14 @@ namespace dxvk { m_isgnChunk, m_osgnChunk, analysisInfo); + this->runAnalyzer(analyzer, m_shexChunk->slice()); + DxbcCompiler compiler( fileName, options, m_shexChunk->version(), m_isgnChunk, m_osgnChunk, analysisInfo); - this->runAnalyzer(analyzer, m_shexChunk->slice()); this->runCompiler(compiler, m_shexChunk->slice()); return compiler.finalize(); diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp index 6ccfd565..4a1621b8 100644 --- a/src/dxbc/dxbc_options.cpp +++ b/src/dxbc/dxbc_options.cpp @@ -35,6 +35,7 @@ namespace dxvk { if (devFeatures.shaderStorageImageReadWithoutFormat) flags.set(DxbcOption::UseStorageImageReadWithoutFormat); + flags.set(DxbcOption::DeferKill); return flags; } diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h index 1a8e575c..72bedd56 100644 --- a/src/dxbc/dxbc_options.h +++ b/src/dxbc/dxbc_options.h @@ -17,6 +17,11 @@ namespace dxvk { /// Use FMin/FMax/FClamp instead of NMin/NMax/NClamp. /// Workaround for bugs in older Nvidia drivers. UseSimpleMinMaxClamp, + + /// Defer kill operation to the end of the shader. + /// Fixes derivatives that are undefined due to + /// non-uniform control flow in fragment shaders. + DeferKill, }; using DxbcOptions = Flags;