diff --git a/src/d3d9/d3d9_adapter.cpp b/src/d3d9/d3d9_adapter.cpp index e0f1a185..11ae3d43 100644 --- a/src/d3d9/d3d9_adapter.cpp +++ b/src/d3d9/d3d9_adapter.cpp @@ -271,6 +271,8 @@ namespace dxvk { auto& options = m_parent->GetOptions(); + const VkPhysicalDeviceLimits& limits = m_adapter->deviceProperties().limits; + // TODO: Actually care about what the adapter supports here. // ^ For Intel and older cards most likely here. @@ -531,7 +533,7 @@ namespace dxvk { // Max Vertex Blend Matrix Index pCaps->MaxVertexBlendMatrixIndex = 0; // Max Point Size - pCaps->MaxPointSize = 256.0f; + pCaps->MaxPointSize = limits.pointSizeRange[1]; // Max Primitive Count pCaps->MaxPrimitiveCount = 0x00555555; // Max Vertex Index diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index e99ad383..5c2e7359 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -1143,6 +1143,9 @@ namespace dxvk { if (unlikely((srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) && m_flags.test(D3D9DeviceFlag::InScene))) return D3DERR_INVALIDCALL; + if (unlikely(Filter != D3DTEXF_NONE && Filter != D3DTEXF_LINEAR && Filter != D3DTEXF_POINT)) + return D3DERR_INVALIDCALL; + VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel); VkExtent3D dstExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel); @@ -1221,8 +1224,43 @@ namespace dxvk { uint32_t(blitInfo.dstOffsets[1].y - blitInfo.dstOffsets[0].y), uint32_t(blitInfo.dstOffsets[1].z - blitInfo.dstOffsets[0].z) }; + bool srcIsDepth = IsDepthFormat(srcFormat); + bool dstIsDepth = IsDepthFormat(dstFormat); + if (unlikely(srcIsDepth || dstIsDepth)) { + if (unlikely(!srcIsDepth || !dstIsDepth)) + return D3DERR_INVALIDCALL; + + if (unlikely(srcTextureInfo->Desc()->Discard || dstTextureInfo->Desc()->Discard)) + return D3DERR_INVALIDCALL; + + if (unlikely(srcCopyExtent.width != srcExtent.width || srcCopyExtent.height != srcExtent.height)) + return D3DERR_INVALIDCALL; + + if (unlikely(m_flags.test(D3D9DeviceFlag::InScene))) + return D3DERR_INVALIDCALL; + } + // Copies would only work if the extents match. (ie. no stretching) bool stretch = srcCopyExtent != dstCopyExtent; + + bool dstHasRTUsage = (dstTextureInfo->Desc()->Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)) != 0; + if (stretch) { + if (unlikely(pSourceSurface == pDestSurface)) + return D3DERR_INVALIDCALL; + + if (unlikely(dstIsDepth)) + return D3DERR_INVALIDCALL; + + if (unlikely(!dstHasRTUsage)) + return D3DERR_INVALIDCALL; + } else { + bool srcIsSurface = srcTextureInfo->GetType() == D3DRTYPE_SURFACE; + bool dstIsSurface = dstTextureInfo->GetType() == D3DRTYPE_SURFACE; + bool srcHasRTUsage = (srcTextureInfo->Desc()->Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)) != 0; + if (unlikely(!dstHasRTUsage && (!dstIsSurface || !srcIsSurface || srcHasRTUsage))) + return D3DERR_INVALIDCALL; + } + fastPath &= !stretch; if (!fastPath || needsResolve) { @@ -2908,14 +2946,19 @@ namespace dxvk { auto slice = dst->GetBufferSlice(); slice = slice.subSlice(offset, slice.length() - offset); + D3D9CompactVertexElements elements; + for (const D3DVERTEXELEMENT9& element : decl->GetElements()) { + elements.emplace_back(element); + } + EmitCs([this, - cDecl = ref(decl), - cVertexCount = VertexCount, - cStartIndex = SrcStartIndex, - cInstanceCount = GetInstanceCount(), - cBufferSlice = slice + cVertexElements = std::move(elements), + cVertexCount = VertexCount, + cStartIndex = SrcStartIndex, + cInstanceCount = GetInstanceCount(), + cBufferSlice = slice ](DxvkContext* ctx) mutable { - Rc shader = m_swvpEmulator.GetShaderModule(this, cDecl); + Rc shader = m_swvpEmulator.GetShaderModule(this, std::move(cVertexElements)); auto drawInfo = GenerateDrawInfo(D3DPT_POINTLIST, cVertexCount, cInstanceCount); @@ -7687,6 +7730,8 @@ namespace dxvk { rs[D3DRS_CLIPPLANEENABLE] = 0; m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + const VkPhysicalDeviceLimits& limits = m_dxvkDevice->adapter()->deviceProperties().limits; + rs[D3DRS_POINTSPRITEENABLE] = FALSE; rs[D3DRS_POINTSCALEENABLE] = FALSE; rs[D3DRS_POINTSCALE_A] = bit::cast(1.0f); @@ -7694,7 +7739,7 @@ namespace dxvk { rs[D3DRS_POINTSCALE_C] = bit::cast(0.0f); rs[D3DRS_POINTSIZE] = bit::cast(1.0f); rs[D3DRS_POINTSIZE_MIN] = bit::cast(1.0f); - rs[D3DRS_POINTSIZE_MAX] = bit::cast(64.0f); + rs[D3DRS_POINTSIZE_MAX] = bit::cast(limits.pointSizeRange[1]); UpdatePushConstant(); UpdatePushConstant(); UpdatePushConstant(); diff --git a/src/d3d9/d3d9_fixed_function.cpp b/src/d3d9/d3d9_fixed_function.cpp index f9a2a1ce..f7f243ec 100644 --- a/src/d3d9/d3d9_fixed_function.cpp +++ b/src/d3d9/d3d9_fixed_function.cpp @@ -1310,6 +1310,8 @@ namespace dxvk { uint32_t midDot = m_module.opDot(m_floatType, normal, mid); midDot = m_module.opFClamp(m_floatType, midDot, m_module.constf32(0.0f), m_module.constf32(1.0f)); uint32_t doSpec = m_module.opFOrdGreaterThan(bool_t, midDot, m_module.constf32(0.0f)); + doSpec = m_module.opLogicalAnd(bool_t, doSpec, m_module.opFOrdGreaterThan(m_floatType, hitDot, m_module.constf32(0.0f))); + uint32_t specularness = m_module.opPow(m_floatType, midDot, m_vs.constants.materialPower); specularness = m_module.opFMul(m_floatType, specularness, atten); specularness = m_module.opSelect(m_floatType, doSpec, specularness, m_module.constf32(0.0f)); diff --git a/src/d3d9/d3d9_state.h b/src/d3d9/d3d9_state.h index 8aeb23e3..2f2af56a 100644 --- a/src/d3d9/d3d9_state.h +++ b/src/d3d9/d3d9_state.h @@ -169,11 +169,11 @@ namespace dxvk { constexpr D3DLIGHT9 DefaultLight = { D3DLIGHT_DIRECTIONAL, // Type - {1.0f, 1.0f, 1.0f, 1.0f}, // Diffuse + {1.0f, 1.0f, 1.0f, 0.0f}, // Diffuse {0.0f, 0.0f, 0.0f, 0.0f}, // Specular {0.0f, 0.0f, 0.0f, 0.0f}, // Ambient {0.0f, 0.0f, 0.0f}, // Position - {0.0f, 0.0f, 0.0f}, // Direction + {0.0f, 0.0f, 1.0f}, // Direction 0.0f, // Range 0.0f, // Falloff 0.0f, 0.0f, 0.0f, // Attenuations [constant, linear, quadratic] diff --git a/src/d3d9/d3d9_stateblock.cpp b/src/d3d9/d3d9_stateblock.cpp index 0d5bacd9..fb2033cd 100644 --- a/src/d3d9/d3d9_stateblock.cpp +++ b/src/d3d9/d3d9_stateblock.cpp @@ -49,7 +49,7 @@ namespace dxvk { if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl)) SetVertexDeclaration(m_deviceState->vertexDecl.ptr()); - ApplyOrCapture(); + ApplyOrCapture(); return D3D_OK; } @@ -61,7 +61,7 @@ namespace dxvk { if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl) && m_state.vertexDecl != nullptr) m_parent->SetVertexDeclaration(m_state.vertexDecl.ptr()); - ApplyOrCapture(); + ApplyOrCapture(); m_applying = false; return D3D_OK; @@ -122,6 +122,20 @@ namespace dxvk { } + HRESULT D3D9StateBlock::SetStreamSourceWithoutOffset( + UINT StreamNumber, + D3D9VertexBuffer* pStreamData, + UINT Stride) { + m_state.vertexBuffers[StreamNumber].vertexBuffer = pStreamData; + + m_state.vertexBuffers[StreamNumber].stride = Stride; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); + m_captures.vertexBuffers.set(StreamNumber, true); + return D3D_OK; + } + + HRESULT D3D9StateBlock::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { m_state.streamFreq[StreamNumber] = Setting; @@ -572,8 +586,12 @@ namespace dxvk { m_captures.flags.set(D3D9CapturedStateFlag::Material); } - if (Type != D3D9StateBlockType::None) - this->Capture(); + if (Type != D3D9StateBlockType::None) { + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl)) + SetVertexDeclaration(m_deviceState->vertexDecl.ptr()); + + ApplyOrCapture(); + } } } diff --git a/src/d3d9/d3d9_stateblock.h b/src/d3d9/d3d9_stateblock.h index 50826617..64bea6f2 100644 --- a/src/d3d9/d3d9_stateblock.h +++ b/src/d3d9/d3d9_stateblock.h @@ -115,6 +115,11 @@ namespace dxvk { UINT OffsetInBytes, UINT Stride); + HRESULT SetStreamSourceWithoutOffset( + UINT StreamNumber, + D3D9VertexBuffer* pStreamData, + UINT Stride); + HRESULT SetStreamSourceFreq(UINT StreamNumber, UINT Setting); HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); @@ -181,7 +186,7 @@ namespace dxvk { Capture }; - template + template void ApplyOrCapture(Dst* dst, const Src* src) { if (m_captures.flags.test(D3D9CapturedStateFlag::StreamFreq)) { for (uint32_t idx : bit::BitMask(m_captures.streamFreq.dword(0))) @@ -211,11 +216,19 @@ namespace dxvk { if (m_captures.flags.test(D3D9CapturedStateFlag::VertexBuffers)) { for (uint32_t idx : bit::BitMask(m_captures.vertexBuffers.dword(0))) { const auto& vbo = src->vertexBuffers[idx]; - dst->SetStreamSource( - idx, - vbo.vertexBuffer.ptr(), - vbo.offset, - vbo.stride); + if constexpr (!IgnoreStreamOffset) { + dst->SetStreamSource( + idx, + vbo.vertexBuffer.ptr(), + vbo.offset, + vbo.stride); + } else { + // For whatever reason, D3D9 doesn't capture the stream offset + dst->SetStreamSourceWithoutOffset( + idx, + vbo.vertexBuffer.ptr(), + vbo.stride); + } } } @@ -324,12 +337,12 @@ namespace dxvk { } } - template + template void ApplyOrCapture() { if constexpr (Func == D3D9StateFunction::Apply) - ApplyOrCapture(m_parent, &m_state); + ApplyOrCapture(m_parent, &m_state); else if constexpr (Func == D3D9StateFunction::Capture) - ApplyOrCapture(this, m_deviceState); + ApplyOrCapture(this, m_deviceState); } template < diff --git a/src/d3d9/d3d9_swapchain.cpp b/src/d3d9/d3d9_swapchain.cpp index 57724330..f93bfbcb 100644 --- a/src/d3d9/d3d9_swapchain.cpp +++ b/src/d3d9/d3d9_swapchain.cpp @@ -172,6 +172,9 @@ namespace dxvk { m_lastDialog = m_dialog; + if (m_window == nullptr) + return D3D_OK; + #ifdef _WIN32 const bool useGDIFallback = m_partialCopy && !HasFrontBuffer(); if (useGDIFallback) @@ -994,6 +997,9 @@ namespace dxvk { void D3D9SwapChainEx::UpdateWindowCtx() { + if (m_window == nullptr) + return; + if (!m_presenters.count(m_window)) { auto res = m_presenters.emplace( std::piecewise_construct, @@ -1323,10 +1329,10 @@ namespace dxvk { || dstRect.right - dstRect.left != LONG(width) || dstRect.bottom - dstRect.top != LONG(height); - bool recreate = - m_wctx->presenter == nullptr - || m_wctx->presenter->info().imageExtent.width != width - || m_wctx->presenter->info().imageExtent.height != height; + bool recreate = m_wctx != nullptr + && (m_wctx->presenter == nullptr + || m_wctx->presenter->info().imageExtent.width != width + || m_wctx->presenter->info().imageExtent.height != height); m_swapchainExtent = { width, height }; m_dstRect = dstRect; diff --git a/src/d3d9/d3d9_swvp_emu.cpp b/src/d3d9/d3d9_swvp_emu.cpp index 213bd279..98ed7782 100644 --- a/src/d3d9/d3d9_swvp_emu.cpp +++ b/src/d3d9/d3d9_swvp_emu.cpp @@ -9,13 +9,14 @@ namespace dxvk { // Doesn't compare everything, only what we use in SWVP. - size_t D3D9VertexDeclHash::operator () (const D3D9VertexElements& key) const { + size_t D3D9VertexDeclHash::operator () (const D3D9CompactVertexElements& key) const { DxvkHashState hash; std::hash bytehash; std::hash wordhash; - for (auto& element : key) { + for (uint32_t i = 0; i < key.size(); i++) { + const auto& element = key[i]; hash.add(wordhash(element.Stream)); hash.add(wordhash(element.Offset)); hash.add(bytehash(element.Type)); @@ -27,7 +28,7 @@ namespace dxvk { return hash; } - bool D3D9VertexDeclEq::operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const { + bool D3D9VertexDeclEq::operator () (const D3D9CompactVertexElements& a, const D3D9CompactVertexElements& b) const { if (a.size() != b.size()) return false; @@ -109,7 +110,7 @@ namespace dxvk { m_module.opLabel(m_module.allocateId()); } - void compile(const D3D9VertexDecl* pDecl) { + void compile(const D3D9CompactVertexElements& elements) { uint32_t uint_t = m_module.defIntType(32, false); uint32_t float_t = m_module.defFloatType(32); uint32_t vec4_t = m_module.defVectorType(float_t, 4); @@ -142,15 +143,24 @@ namespace dxvk { m_module.decorateBuiltIn(primitiveIdPtr, spv::BuiltInPrimitiveId); uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr); - + // The size of any given vertex - uint32_t vertexSize = m_module.constu32(pDecl->GetSize(0) / sizeof(uint32_t)); + uint32_t size = 0; + for (uint32_t i = 0; i < elements.size(); i++) { + const auto& element = elements[i]; + if (element.Stream == 0 && element.Type != D3DDECLTYPE_UNUSED) { + size = std::max(size, element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); + } + } + + uint32_t vertexSize = m_module.constu32(size / sizeof(uint32_t)); //The offset of this vertex from the beginning of the buffer uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId); - for (auto& element : pDecl->GetElements()) { + for (uint32_t i = 0; i < elements.size(); i++) { + const auto& element = elements[i]; // Load the slot associated with this element DxsoSemantic semantic = { DxsoUsage(element.Usage), element.UsageIndex }; @@ -297,9 +307,7 @@ namespace dxvk { }; - Rc D3D9SWVPEmulator::GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexDecl* pDecl) { - auto& elements = pDecl->GetElements(); - + Rc D3D9SWVPEmulator::GetShaderModule(D3D9DeviceEx* pDevice, D3D9CompactVertexElements&& elements) { // Use the shader's unique key for the lookup { std::unique_lock lock(m_mutex); @@ -317,7 +325,7 @@ namespace dxvk { // This shader has not been compiled yet, so we have to create a // new module. This takes a while, so we won't lock the structure. D3D9SWVPEmulatorGenerator generator(name); - generator.compile(pDecl); + generator.compile(elements); Rc shader = generator.finalize(); shader->setShaderKey(key); @@ -338,7 +346,8 @@ namespace dxvk { // that object instead and discard the newly created module. { std::unique_lock lock(m_mutex); - auto status = m_modules.insert({ elements, shader }); + std::pair> pair = { std::move(elements), shader }; + auto status = m_modules.insert(std::move(pair)); if (!status.second) return status.first->second; } diff --git a/src/d3d9/d3d9_swvp_emu.h b/src/d3d9/d3d9_swvp_emu.h index 91aae4c2..905a5766 100644 --- a/src/d3d9/d3d9_swvp_emu.h +++ b/src/d3d9/d3d9_swvp_emu.h @@ -11,26 +11,41 @@ namespace dxvk { class D3D9VertexDecl; class D3D9DeviceEx; + struct D3D9CompactVertexElement { + uint16_t Stream : 4; + uint16_t Type : 5; + uint16_t Method : 3; + uint16_t Usage : 4; + uint16_t UsageIndex; + uint16_t Offset; + + D3D9CompactVertexElement(const D3DVERTEXELEMENT9& element) + : Stream(element.Stream), Type(element.Type), Method(element.Method), + Usage(element.Usage), UsageIndex(element.UsageIndex), Offset(element.Offset) {} + }; + + using D3D9CompactVertexElements = small_vector; + struct D3D9VertexDeclHash { - size_t operator () (const D3D9VertexElements& key) const; + size_t operator () (const D3D9CompactVertexElements& key) const; }; struct D3D9VertexDeclEq { - bool operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const; + bool operator () (const D3D9CompactVertexElements& a, const D3D9CompactVertexElements& b) const; }; class D3D9SWVPEmulator { public: - Rc GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexDecl* pDecl); + Rc GetShaderModule(D3D9DeviceEx* pDevice, D3D9CompactVertexElements&& elements); private: dxvk::mutex m_mutex; std::unordered_map< - D3D9VertexElements, Rc, + D3D9CompactVertexElements, Rc, D3D9VertexDeclHash, D3D9VertexDeclEq> m_modules; }; diff --git a/src/util/util_matrix.cpp b/src/util/util_matrix.cpp index 2c5e9314..4b513d11 100644 --- a/src/util/util_matrix.cpp +++ b/src/util/util_matrix.cpp @@ -205,6 +205,10 @@ namespace dxvk { Vector4 dot0 = { m[0] * row0 }; float dot1 = (dot0.x + dot0.y) + (dot0.z + dot0.w); + if (unlikely(std::abs(dot1) <= 0.000001f)) { + return m; + } + return inverse * (1.0f / dot1); } diff --git a/src/util/util_small_vector.h b/src/util/util_small_vector.h index 48fe8bff..d96e58e0 100644 --- a/src/util/util_small_vector.h +++ b/src/util/util_small_vector.h @@ -13,8 +13,37 @@ namespace dxvk { small_vector() { } - small_vector (const small_vector&) = delete; - small_vector& operator = (const small_vector&) = delete; + small_vector(const small_vector& other) { + reserve(other.m_size); + for (size_t i = 0; i < other.m_size; i++) { + *ptr(i) = *other.ptr(i); + } + m_size = other.m_size; + }; + + small_vector& operator = (const small_vector& other) { + for (size_t i = 0; i < m_size; i++) + ptr(i)->~T(); + + reserve(other.m_size); + for (size_t i = 0; i < other.m_size; i++) { + *ptr(i) = *other.ptr(i); + } + m_size = other.m_size; + }; + + small_vector(small_vector&& other) { + if (other.m_capacity == N) { + for (size_t i = 0; i < other.m_size; i++) { + u.m_data[i] = std::move(other.u.m_data[i]); + } + } else { + u.m_ptr = other.u.m_ptr; + other.m_capacity = N; + } + m_size = other.m_size; + other.m_size = 0; + } ~small_vector() { for (size_t i = 0; i < m_size; i++) @@ -23,7 +52,7 @@ namespace dxvk { if (m_capacity > N) delete[] u.m_ptr; } - + size_t size() const { return m_size; } @@ -43,7 +72,7 @@ namespace dxvk { if (m_capacity > N) delete[] u.m_ptr; - + m_capacity = n; u.m_ptr = data; } @@ -56,7 +85,7 @@ namespace dxvk { for (size_t i = n; i < m_size; i++) ptr(i)->~T(); - + for (size_t i = m_size; i < n; i++) new (ptr(i)) T();