diff --git a/src/d3d9/d3d9_constant_set.h b/src/d3d9/d3d9_constant_set.h index 16b4e7e9..210df885 100644 --- a/src/d3d9/d3d9_constant_set.h +++ b/src/d3d9/d3d9_constant_set.h @@ -38,7 +38,14 @@ namespace dxvk { uint32_t bConsts[1]; }; + struct D3D9SwvpConstantBuffers { + Rc floatBuffer; + Rc intBuffer; + Rc boolBuffer; + }; + struct D3D9ConstantSets { + D3D9SwvpConstantBuffers swvpBuffers; Rc buffer; DxsoShaderMetaInfo meta = {}; bool dirty = true; diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 6a27b1de..12dafff8 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -77,6 +77,27 @@ namespace dxvk { m_dxsoOptions = DxsoOptions(this, m_d3d9Options); + const bool supportsRobustness2 = m_dxvkDevice->features().extRobustness2.robustBufferAccess2; + bool useRobustConstantAccess = canSWVP && supportsRobustness2; + if (useRobustConstantAccess) { + m_robustSSBOAlignment = m_dxvkDevice->properties().extRobustness2.robustStorageBufferAccessSizeAlignment; + m_robustUBOAlignment = m_dxvkDevice->properties().extRobustness2.robustUniformBufferAccessSizeAlignment; + const uint32_t floatBufferAlignment = m_dxsoOptions.vertexFloatConstantBufferAsSSBO ? m_robustSSBOAlignment : m_robustUBOAlignment; + useRobustConstantAccess &= m_vsLayout.floatSize() % floatBufferAlignment == 0; + useRobustConstantAccess &= m_vsLayout.intSize() % m_robustUBOAlignment == 0; + useRobustConstantAccess &= m_vsLayout.bitmaskSize() % m_robustUBOAlignment == 0; + } + + if (!useRobustConstantAccess) { + m_vsFloatConstsCount = m_vsLayout.floatCount; + m_vsIntConstsCount = m_vsLayout.intCount; + m_vsBoolConstsCount = m_vsLayout.boolCount; + + if (supportsRobustness2 && canSWVP) { + Logger::warn("Disabling robust constant buffer access because of alignment."); + } + } + CreateConstantBuffers(); m_availableMemory = DetermineInitialTextureMemory(); @@ -4820,17 +4841,19 @@ namespace dxvk { void D3D9DeviceEx::CreateConstantBuffers() { - m_consts[DxsoProgramTypes::VertexShader].buffer = - CreateConstantBuffer(m_dxsoOptions.vertexConstantBufferAsSSBO, - m_vsLayout.totalSize(), - DxsoProgramType::VertexShader, - DxsoConstantBuffers::VSConstantBuffer); - + if (!m_isSWVP) { + m_consts[DxsoProgramTypes::VertexShader].buffer = + CreateConstantBuffer(false, + m_vsLayout.totalSize(), + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSConstantBuffer); + } + // SWVP constant buffers are created late based on the amount of constants set by the application m_consts[DxsoProgramTypes::PixelShader].buffer = CreateConstantBuffer(false, - m_psLayout.totalSize(), - DxsoProgramType::PixelShader, - DxsoConstantBuffers::PSConstantBuffer); + m_psLayout.totalSize(), + DxsoProgramType::PixelShader, + DxsoConstantBuffers::PSConstantBuffer); m_vsClipPlanes = CreateConstantBuffer(false, @@ -4866,36 +4889,72 @@ namespace dxvk { } - template - inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) { - const D3D9ConstantSets& constSet = m_consts[ShaderStage]; + template + inline void D3D9DeviceEx::UploadSoftwareConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout) { + /* + * SWVP raises the amount of constants by a lot. + * To avoid copying huge amounts of data for every draw call, + * we track the highest set constant and only use a buffer big enough + * to fit that. We rely on robustness to return 0 for OOB reads. + */ - auto* dst = reinterpret_cast(pData); + D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader]; - if (constSet.meta.maxConstIndexF) - std::memcpy(dst->fConsts, Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); - if (constSet.meta.maxConstIndexI) - std::memcpy(dst->iConsts, Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); + if (!constSet.dirty) + return; + + constSet.dirty = false; + + const uint32_t floatDataSize = std::min(constSet.meta.maxConstIndexF, m_vsFloatConstsCount) * sizeof(Vector4); + const uint32_t intDataSize = std::min(constSet.meta.maxConstIndexI, m_vsIntConstsCount) * sizeof(Vector4i); + const uint32_t boolDataSize = divCeil(std::min(constSet.meta.maxConstIndexB, m_vsBoolConstsCount), 32u) * uint32_t(sizeof(uint32_t)); + + Rc& floatBuffer = constSet.swvpBuffers.floatBuffer; + // Max copy source size is 8192 * 16 => always aligned to any plausible value + // => we won't copy out of bounds + if (likely(constSet.meta.maxConstIndexF != 0 || floatBuffer == nullptr)) { + CopySoftwareConstants(DxsoConstantBuffers::VSFloatConstantBuffer, floatBuffer, Src.fConsts, floatDataSize, m_dxsoOptions.vertexFloatConstantBufferAsSSBO); + } + + Rc& intBuffer = constSet.swvpBuffers.intBuffer; + // Max copy source size is 2048 * 16 => always aligned to any plausible value + // => we won't copy out of bounds + if (likely(constSet.meta.maxConstIndexI != 0 || intBuffer == nullptr)) { + CopySoftwareConstants(DxsoConstantBuffers::VSIntConstantBuffer, intBuffer, Src.iConsts, intDataSize, false); + } + + Rc& boolBuffer = constSet.swvpBuffers.boolBuffer; + if (likely(constSet.meta.maxConstIndexB != 0 || boolBuffer == nullptr)) { + CopySoftwareConstants(DxsoConstantBuffers::VSBoolConstantBuffer, boolBuffer, Src.bConsts, boolDataSize, false); + } } - template - inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { - const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader]; + inline void D3D9DeviceEx::CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t size, bool useSSBO) { + uint32_t minSize = useSSBO ? m_robustSSBOAlignment : m_robustUBOAlignment; + minSize = std::max(minSize, 64u); + size = std::max(size, minSize); + + DxvkBufferSliceHandle slice; + if (unlikely(dstBuffer == nullptr || dstBuffer->info().size != size)) { + dstBuffer = CreateConstantBuffer(useSSBO, size, DxsoProgramType::VertexShader, cBufferTarget); + slice = dstBuffer->getSliceHandle(); + } else { + slice = dstBuffer->allocSlice(); + EmitCs([ + cBuffer = dstBuffer, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + } - auto dst = reinterpret_cast(pData); - - if (constSet.meta.maxConstIndexF) - std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); - if (constSet.meta.maxConstIndexI) - std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); - if (constSet.meta.maxConstIndexB) - std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize()); + std::memcpy(slice.mapPtr, src, size); } template - inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const ShaderType& Shader) { D3D9ConstantSets& constSet = m_consts[ShaderStage]; if (!constSet.dirty) @@ -4912,12 +4971,12 @@ namespace dxvk { ctx->invalidateBuffer(cBuffer, cSlice); }); - if constexpr (ShaderStage == DxsoProgramType::PixelShader) - UploadHardwareConstantSet(slice.mapPtr, Src, Shader); - else if (likely(!CanSWVP())) - UploadHardwareConstantSet(slice.mapPtr, Src, Shader); - else - UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader); + auto* dst = reinterpret_cast(slice.mapPtr); + + if (constSet.meta.maxConstIndexF) + std::memcpy(dst->fConsts, Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); + if (constSet.meta.maxConstIndexI) + std::memcpy(dst->iConsts, Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); if (constSet.meta.needsConstantCopies) { Vector4* data = reinterpret_cast(slice.mapPtr); @@ -4932,10 +4991,14 @@ namespace dxvk { template void D3D9DeviceEx::UploadConstants() { - if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) - return UploadConstantSet(m_state.vsConsts, m_vsLayout, m_state.vertexShader); - else - return UploadConstantSet (m_state.psConsts, m_psLayout, m_state.pixelShader); + if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) { + if (CanSWVP()) + return UploadSoftwareConstantSet(m_state.vsConsts, m_vsLayout); + else + return UploadConstantSet(m_state.vsConsts, m_state.vertexShader); + } else { + return UploadConstantSet (m_state.psConsts, m_state.pixelShader); + } } @@ -6321,12 +6384,26 @@ namespace dxvk { pConstantData, Count); + if constexpr (ProgramType == DxsoProgramType::VertexShader) { + if constexpr (ConstantType == D3D9ConstantType::Float) { + m_vsFloatConstsCount = std::max(m_vsFloatConstsCount, StartRegister + Count); + } else if constexpr (ConstantType == D3D9ConstantType::Int) { + m_vsIntConstsCount = std::max(m_vsIntConstsCount, StartRegister + Count); + } else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ { + m_vsBoolConstsCount = std::max(m_vsBoolConstsCount, StartRegister + Count); + } + } + if constexpr (ConstantType != D3D9ConstantType::Bool) { uint32_t maxCount = ConstantType == D3D9ConstantType::Float ? m_consts[ProgramType].meta.maxConstIndexF : m_consts[ProgramType].meta.maxConstIndexI; m_consts[ProgramType].dirty |= StartRegister < maxCount; + } else if constexpr (ProgramType == DxsoProgramType::VertexShader) { + if (unlikely(CanSWVP())) { + m_consts[DxsoProgramType::VertexShader].dirty |= StartRegister < m_consts[ProgramType].meta.maxConstIndexB; + } } UpdateStateConstants( diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index be18542d..bd598a4c 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -814,11 +814,13 @@ namespace dxvk { template inline void UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader); - template - inline void UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); + template + inline void UploadSoftwareConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout); + + inline void CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t copySize, bool useSSBO); template - inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); + inline void UploadConstantSet(const SoftwareLayoutType& Src, const ShaderType& Shader); template void UploadConstants(); @@ -1223,6 +1225,13 @@ namespace dxvk { float m_depthBiasScale = 0.0f; + uint32_t m_robustSSBOAlignment = 1; + uint32_t m_robustUBOAlignment = 1; + + uint32_t m_vsFloatConstsCount = 0; + uint32_t m_vsIntConstsCount = 0; + uint32_t m_vsBoolConstsCount = 0; + D3D9ConstantLayout m_vsLayout; D3D9ConstantLayout m_psLayout; D3D9ConstantSets m_consts[DxsoProgramTypes::Count]; diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index f7bd312b..d121d1f5 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -264,7 +264,18 @@ namespace dxvk { m_module.enableCapability(spv::CapabilityShader); m_module.enableCapability(spv::CapabilityImageQuery); - this->emitDclConstantBuffer(); + if (isSwvp()) { + m_cFloatBuffer = this->emitDclSwvpConstantBuffer(); + m_cIntBuffer = this->emitDclSwvpConstantBuffer(); + m_cBoolBuffer = this->emitDclSwvpConstantBuffer(); + } else { + this->emitDclConstantBuffer(); + } + + m_depthSpecConstant = m_module.specConst32(m_module.defIntType(32, 0), 0); + m_module.decorateSpecId(m_depthSpecConstant, getSpecId(D3D9SpecConstantId::SamplerDepthMode)); + m_module.setDebugName(m_depthSpecConstant, "depthSamplers"); + this->emitDclInputArray(); // Initialize the shader module with capabilities @@ -276,12 +287,8 @@ namespace dxvk { } } - void DxsoCompiler::emitDclConstantBuffer() { - const bool asSsbo = m_moduleInfo.options.vertexConstantBufferAsSSBO && - m_programInfo.type() == DxsoProgramType::VertexShader; - - std::array members = { + std::array members = { // float f[256 or 224 or 8192] m_module.defArrayTypeUnique( getVectorTypeId({ DxsoScalarType::Float32, 4 }), @@ -290,49 +297,27 @@ namespace dxvk { // int i[16 or 2048] m_module.defArrayTypeUnique( getVectorTypeId({ DxsoScalarType::Sint32, 4 }), - m_module.constu32(m_layout->intCount)), - - // uint32_t boolBitmask - // or uvec4 boolBitmask[512] - // Defined later... - 0 + m_module.constu32(m_layout->intCount)) }; // Decorate array strides, this is required. m_module.decorateArrayStride(members[0], 16); m_module.decorateArrayStride(members[1], 16); - const bool swvp = m_layout->bitmaskCount != 1; - - if (swvp) { - // Must be a multiple of 4 otherwise. - members[2] = m_module.defArrayTypeUnique( - getVectorTypeId({ DxsoScalarType::Uint32, 4 }), - m_module.constu32(m_layout->bitmaskCount / 4)); - - m_module.decorateArrayStride(members[2], 16); - } - const uint32_t structType = - m_module.defStructType(swvp ? 3 : 2, members.data()); + m_module.defStructType(members.size(), members.data()); - m_module.decorate(structType, asSsbo + m_module.decorate(structType, false ? spv::DecorationBufferBlock : spv::DecorationBlock); m_module.memberDecorateOffset(structType, 0, m_layout->floatOffset()); m_module.memberDecorateOffset(structType, 1, m_layout->intOffset()); - if (swvp) - m_module.memberDecorateOffset(structType, 2, m_layout->bitmaskOffset()); - m_module.setDebugName(structType, "cbuffer_t"); m_module.setDebugMemberName(structType, 0, "f"); m_module.setDebugMemberName(structType, 1, "i"); - if (swvp) - m_module.setDebugMemberName(structType, 2, "b"); - m_cBuffer = m_module.newVar( m_module.defPointerType(structType, spv::StorageClassUniform), spv::StorageClassUniform); @@ -346,14 +331,9 @@ namespace dxvk { m_module.decorateDescriptorSet(m_cBuffer, 0); m_module.decorateBinding(m_cBuffer, bindingId); - if (asSsbo) - m_module.decorate(m_cBuffer, spv::DecorationNonWritable); - DxvkResourceSlot resource; resource.slot = bindingId; - resource.type = asSsbo - ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER - : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; resource.access = VK_ACCESS_UNIFORM_READ_BIT; m_resourceSlots.push_back(resource); @@ -364,10 +344,97 @@ namespace dxvk { ? D3D9SpecConstantId::VertexShaderBools : D3D9SpecConstantId::PixelShaderBools)); m_module.setDebugName(m_boolSpecConstant, "boolConstants"); + } - m_depthSpecConstant = m_module.specConst32(m_module.defIntType(32, 0), 0); - m_module.decorateSpecId(m_depthSpecConstant, getSpecId(D3D9SpecConstantId::SamplerDepthMode)); - m_module.setDebugName(m_depthSpecConstant, "depthSamplers"); + template + int DxsoCompiler::emitDclSwvpConstantBuffer() { + uint32_t member; + bool asSsbo; + if constexpr (ConstantBufferType == DxsoConstantBufferType::Float) { + asSsbo = m_moduleInfo.options.vertexFloatConstantBufferAsSSBO; + + // float f[8192] + member = m_module.defArrayTypeUnique( + getVectorTypeId({ DxsoScalarType::Float32, 4 }), + m_module.constu32(m_layout->floatCount)); + m_module.decorateArrayStride(member, 16); + } else if constexpr (ConstantBufferType == DxsoConstantBufferType::Int) { + asSsbo = false; + + // int i[2048] + member = m_module.defArrayTypeUnique( + getVectorTypeId({ DxsoScalarType::Sint32, 4 }), + m_module.constu32(m_layout->intCount)); + m_module.decorateArrayStride(member, 16); + } else { + asSsbo = false; + + // int i[256] (bitmasks for 2048 bools) + member = m_module.defArrayTypeUnique( + getVectorTypeId({ DxsoScalarType::Uint32, 4 }), + m_module.constu32(m_layout->bitmaskCount / 4)); + // Must be a multiple of 4 otherwise. + m_module.decorateArrayStride(member, 16); + } + + const uint32_t structType = + m_module.defStructType(1, &member); + + m_module.decorate(structType, asSsbo + ? spv::DecorationBufferBlock + : spv::DecorationBlock); + + m_module.memberDecorateOffset(structType, 0, 0); + + if constexpr (ConstantBufferType == DxsoConstantBufferType::Float) { + m_module.setDebugName(structType, "cbuffer_float_t"); + m_module.setDebugMemberName(structType, 0, "f"); + } else if constexpr (ConstantBufferType == DxsoConstantBufferType::Int) { + m_module.setDebugName(structType, "cbuffer_int_t"); + m_module.setDebugMemberName(structType, 0, "i"); + } else { + m_module.setDebugName(structType, "cbuffer_bool_t"); + m_module.setDebugMemberName(structType, 0, "b"); + } + + uint32_t constantBufferId = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + uint32_t bindingId; + if constexpr (ConstantBufferType == DxsoConstantBufferType::Float) { + m_module.setDebugName(constantBufferId, "cF"); + bindingId = computeResourceSlotId( + m_programInfo.type(), DxsoBindingType::ConstantBuffer, + 0); + } else if constexpr (ConstantBufferType == DxsoConstantBufferType::Int) { + m_module.setDebugName(constantBufferId, "cI"); + bindingId = computeResourceSlotId( + m_programInfo.type(), DxsoBindingType::ConstantBuffer, + 1); + } else { + m_module.setDebugName(constantBufferId, "cB"); + bindingId = computeResourceSlotId( + m_programInfo.type(), DxsoBindingType::ConstantBuffer, + 2); + } + + m_module.decorateDescriptorSet(constantBufferId, 0); + m_module.decorateBinding(constantBufferId, bindingId); + + if (asSsbo) + m_module.decorate(constantBufferId, spv::DecorationNonWritable); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = asSsbo + ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = asSsbo ? VK_ACCESS_MEMORY_READ_BIT : VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + + return constantBufferId; } @@ -932,16 +999,33 @@ namespace dxvk { uint32_t relativeIdx = this->emitArrayIndex(reg.id.num, relative); if (reg.id.type != DxsoRegisterType::ConstBool) { - uint32_t structIdx = reg.id.type == DxsoRegisterType::Const - ? m_module.constu32(0) - : m_module.constu32(1); + uint32_t structIdx; + uint32_t cBufferId; + + if (reg.id.type == DxsoRegisterType::Const) { + if (isSwvp()) { + structIdx = m_module.constu32(0); + cBufferId = m_cFloatBuffer; + } else { + structIdx = m_module.constu32(0); + cBufferId = m_cBuffer; + } + } else { + if (isSwvp()) { + structIdx = m_module.constu32(0); + cBufferId = m_cIntBuffer; + } else { + structIdx = m_module.constu32(1); + cBufferId = m_cBuffer; + } + } std::array indices = { structIdx, relativeIdx }; uint32_t typeId = getVectorTypeId(result.type); uint32_t ptrId = m_module.opAccessChain( m_module.defPointerType(typeId, spv::StorageClassUniform), - m_cBuffer, indices.size(), indices.data()); + cBufferId, indices.size(), indices.data()); result.id = m_module.opLoad(typeId, ptrId); @@ -969,14 +1053,14 @@ namespace dxvk { // If not SWVP, spec const this uint32_t bitfield; if (m_layout->bitmaskCount != 1) { - std::array indices = { m_module.constu32(2), m_module.constu32(reg.id.num / 128) }; + std::array indices = { m_module.constu32(0), m_module.constu32(reg.id.num / 128) }; uint32_t indexCount = m_layout->bitmaskCount == 1 ? 1 : 2; uint32_t accessType = m_layout->bitmaskCount == 1 ? uintType : uvec4Type; uint32_t ptrId = m_module.opAccessChain( m_module.defPointerType(accessType, spv::StorageClassUniform), - m_cBuffer, indexCount, indices.data()); + m_cBoolBuffer, indexCount, indices.data()); bitfield = m_module.opLoad(accessType, ptrId); } diff --git a/src/dxso/dxso_compiler.h b/src/dxso/dxso_compiler.h index a8257102..a96614d0 100644 --- a/src/dxso/dxso_compiler.h +++ b/src/dxso/dxso_compiler.h @@ -4,6 +4,7 @@ #include "dxso_header.h" #include "dxso_modinfo.h" #include "dxso_isgn.h" +#include "dxso_util.h" #include "../d3d9/d3d9_constant_layout.h" #include "../d3d9/d3d9_shader_permutations.h" @@ -302,7 +303,10 @@ namespace dxvk { //////////////////////////////////// // Ptr to the constant buffer array - uint32_t m_cBuffer; + uint32_t m_cBuffer = 0; + uint32_t m_cFloatBuffer = 0; + uint32_t m_cIntBuffer = 0; + uint32_t m_cBoolBuffer = 0; //////////////////////////////////////// // Constant buffer deffed mappings @@ -373,6 +377,9 @@ namespace dxvk { ////////////////////// // Common shader dcls + template + int emitDclSwvpConstantBuffer(); + void emitDclConstantBuffer(); void emitDclInputArray(); @@ -678,6 +685,11 @@ namespace dxvk { uint32_t getPointerTypeId( const DxsoRegisterInfo& type); + + bool isSwvp() { + return m_layout->bitmaskCount != 1; + } + }; } \ No newline at end of file diff --git a/src/dxso/dxso_options.cpp b/src/dxso/dxso_options.cpp index a06f25b3..edcb978d 100644 --- a/src/dxso/dxso_options.cpp +++ b/src/dxso/dxso_options.cpp @@ -38,7 +38,7 @@ namespace dxvk { forceSamplerTypeSpecConstants = options.forceSamplerTypeSpecConstants; - vertexConstantBufferAsSSBO = pDevice->GetVertexConstantLayout().totalSize() > devInfo.core.properties.limits.maxUniformBufferRange; + vertexFloatConstantBufferAsSSBO = pDevice->GetVertexConstantLayout().floatSize() > devInfo.core.properties.limits.maxUniformBufferRange; longMad = options.longMad; diff --git a/src/dxso/dxso_options.h b/src/dxso/dxso_options.h index 24ca4264..2763aa6c 100644 --- a/src/dxso/dxso_options.h +++ b/src/dxso/dxso_options.h @@ -43,8 +43,8 @@ namespace dxvk { /// Works around a game bug in Halo CE where it gives cube textures to 2d/volume samplers bool forceSamplerTypeSpecConstants; - /// Should the VS constant buffer be an SSBO (swvp on NV) - bool vertexConstantBufferAsSSBO; + /// Should the SWVP float constant buffer be a SSBO (because of the size on NV) + bool vertexFloatConstantBufferAsSSBO; /// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd? /// This solves some rendering bugs in games that have z-pass shaders which diff --git a/src/dxso/dxso_util.h b/src/dxso/dxso_util.h index fac03d73..1503672f 100644 --- a/src/dxso/dxso_util.h +++ b/src/dxso/dxso_util.h @@ -12,11 +12,20 @@ namespace dxvk { Image, }; + enum class DxsoConstantBufferType : uint32_t { + Float, + Int, + Bool + }; + enum DxsoConstantBuffers : uint32_t { VSConstantBuffer = 0, - VSClipPlanes = 1, - VSFixedFunction = 2, - VSVertexBlendData = 3, + VSFloatConstantBuffer = 0, + VSIntConstantBuffer = 1, + VSBoolConstantBuffer = 2, + VSClipPlanes = 3, + VSFixedFunction = 4, + VSVertexBlendData = 5, VSCount, PSConstantBuffer = 0, @@ -29,7 +38,7 @@ namespace dxvk { DxsoProgramType shaderStage, DxsoBindingType bindingType, uint32_t bindingIndex) { - const uint32_t stageOffset = 8 * uint32_t(shaderStage); + const uint32_t stageOffset = (VSCount + 4) * uint32_t(shaderStage); if (bindingType == DxsoBindingType::ConstantBuffer) return bindingIndex + stageOffset; diff --git a/src/util/util_math.h b/src/util/util_math.h index fdb3762b..5457ef07 100644 --- a/src/util/util_math.h +++ b/src/util/util_math.h @@ -30,5 +30,10 @@ namespace dxvk { return std::fmin( std::fmax(value, min), max); } + + template + inline T divCeil(T dividend, T divisor) { + return (dividend + divisor - 1) / divisor; + } } \ No newline at end of file