#include #include "d3d11_context.h" #include "d3d11_context_def.h" #include "d3d11_context_imm.h" namespace dxvk { template D3D11CommonContext::D3D11CommonContext( D3D11Device* pParent, const Rc& Device, UINT ContextFlags, DxvkCsChunkFlags CsFlags) : D3D11DeviceChild(pParent), m_contextExt(GetTypedContext()), m_annotation(GetTypedContext(), Device), m_device (Device), m_flags (ContextFlags), m_staging (Device, StagingBufferSize), m_csFlags (CsFlags), m_csChunk (AllocCsChunk()), m_cmdData (nullptr) { } template D3D11CommonContext::~D3D11CommonContext() { } template HRESULT STDMETHODCALLTYPE D3D11CommonContext::QueryInterface(REFIID riid, void** ppvObject) { if (ppvObject == nullptr) return E_POINTER; *ppvObject = nullptr; if (riid == __uuidof(IUnknown) || riid == __uuidof(ID3D11DeviceChild) || riid == __uuidof(ID3D11DeviceContext) || riid == __uuidof(ID3D11DeviceContext1) || riid == __uuidof(ID3D11DeviceContext2) || riid == __uuidof(ID3D11DeviceContext3) || riid == __uuidof(ID3D11DeviceContext4)) { *ppvObject = ref(this); return S_OK; } if (riid == __uuidof(ID3D11VkExtContext) || riid == __uuidof(ID3D11VkExtContext1)) { *ppvObject = ref(&m_contextExt); return S_OK; } if (riid == __uuidof(ID3DUserDefinedAnnotation) || riid == __uuidof(IDXVKUserDefinedAnnotation)) { *ppvObject = ref(&m_annotation); return S_OK; } Logger::warn("D3D11DeviceContext::QueryInterface: Unknown interface query"); Logger::warn(str::format(riid)); return E_NOINTERFACE; } template D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE D3D11CommonContext::GetType() { return IsDeferred ? D3D11_DEVICE_CONTEXT_DEFERRED : D3D11_DEVICE_CONTEXT_IMMEDIATE; } template UINT STDMETHODCALLTYPE D3D11CommonContext::GetContextFlags() { return m_flags; } template void STDMETHODCALLTYPE D3D11CommonContext::ClearState() { D3D10DeviceLock lock = LockContext(); ResetCommandListState(); ResetContextState(); } template void STDMETHODCALLTYPE D3D11CommonContext::DiscardResource(ID3D11Resource* pResource) { D3D10DeviceLock lock = LockContext(); if (!pResource) return; // We don't support the Discard API for images D3D11_RESOURCE_DIMENSION resType = D3D11_RESOURCE_DIMENSION_UNKNOWN; pResource->GetType(&resType); if (resType == D3D11_RESOURCE_DIMENSION_BUFFER) { DiscardBuffer(pResource); } else { auto texture = GetCommonTexture(pResource); for (uint32_t i = 0; i < texture->CountSubresources(); i++) DiscardTexture(pResource, i); } } template void STDMETHODCALLTYPE D3D11CommonContext::DiscardView(ID3D11View* pResourceView) { DiscardView1(pResourceView, nullptr, 0); } template void STDMETHODCALLTYPE D3D11CommonContext::DiscardView1( ID3D11View* pResourceView, const D3D11_RECT* pRects, UINT NumRects) { D3D10DeviceLock lock = LockContext(); // We don't support discarding individual rectangles if (!pResourceView || (NumRects && pRects)) return; // ID3D11View has no methods to query the exact type of // the view, so we'll have to check each possible class auto dsv = dynamic_cast(pResourceView); auto rtv = dynamic_cast(pResourceView); auto uav = dynamic_cast(pResourceView); Rc view; if (dsv) view = dsv->GetImageView(); if (rtv) view = rtv->GetImageView(); if (uav) view = uav->GetImageView(); if (view == nullptr) return; // Get information about underlying resource Com resource; pResourceView->GetResource(&resource); uint32_t mipCount = GetCommonTexture(resource.ptr())->Desc()->MipLevels; // Discard mip levels one by one VkImageSubresourceRange sr = view->subresources(); for (uint32_t layer = 0; layer < sr.layerCount; layer++) { for (uint32_t mip = 0; mip < sr.levelCount; mip++) { DiscardTexture(resource.ptr(), D3D11CalcSubresource( sr.baseMipLevel + mip, sr.baseArrayLayer + layer, mipCount)); } } // Since we don't handle SRVs here, we can assume that the // view covers all aspects of the underlying resource. EmitCs([cView = view] (DxvkContext* ctx) { ctx->discardImageView(cView, cView->formatInfo()->aspectMask); }); } template void STDMETHODCALLTYPE D3D11CommonContext::CopySubresourceRegion( ID3D11Resource* pDstResource, UINT DstSubresource, UINT DstX, UINT DstY, UINT DstZ, ID3D11Resource* pSrcResource, UINT SrcSubresource, const D3D11_BOX* pSrcBox) { CopySubresourceRegion1( pDstResource, DstSubresource, DstX, DstY, DstZ, pSrcResource, SrcSubresource, pSrcBox, 0); } template void STDMETHODCALLTYPE D3D11CommonContext::CopySubresourceRegion1( ID3D11Resource* pDstResource, UINT DstSubresource, UINT DstX, UINT DstY, UINT DstZ, ID3D11Resource* pSrcResource, UINT SrcSubresource, const D3D11_BOX* pSrcBox, UINT CopyFlags) { D3D10DeviceLock lock = LockContext(); if (!pDstResource || !pSrcResource) return; if (pSrcBox && (pSrcBox->left >= pSrcBox->right || pSrcBox->top >= pSrcBox->bottom || pSrcBox->front >= pSrcBox->back)) return; D3D11_RESOURCE_DIMENSION dstResourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; D3D11_RESOURCE_DIMENSION srcResourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; pDstResource->GetType(&dstResourceDim); pSrcResource->GetType(&srcResourceDim); if (dstResourceDim == D3D11_RESOURCE_DIMENSION_BUFFER && srcResourceDim == D3D11_RESOURCE_DIMENSION_BUFFER) { auto dstBuffer = static_cast(pDstResource); auto srcBuffer = static_cast(pSrcResource); VkDeviceSize dstOffset = DstX; VkDeviceSize srcOffset = 0; VkDeviceSize byteCount = -1; if (pSrcBox) { srcOffset = pSrcBox->left; byteCount = pSrcBox->right - pSrcBox->left; } CopyBuffer(dstBuffer, dstOffset, srcBuffer, srcOffset, byteCount); } else if (dstResourceDim != D3D11_RESOURCE_DIMENSION_BUFFER && srcResourceDim != D3D11_RESOURCE_DIMENSION_BUFFER) { auto dstTexture = GetCommonTexture(pDstResource); auto srcTexture = GetCommonTexture(pSrcResource); if (DstSubresource >= dstTexture->CountSubresources() || SrcSubresource >= srcTexture->CountSubresources()) return; auto dstFormatInfo = lookupFormatInfo(dstTexture->GetPackedFormat()); auto srcFormatInfo = lookupFormatInfo(srcTexture->GetPackedFormat()); auto dstLayers = vk::makeSubresourceLayers(dstTexture->GetSubresourceFromIndex(dstFormatInfo->aspectMask, DstSubresource)); auto srcLayers = vk::makeSubresourceLayers(srcTexture->GetSubresourceFromIndex(srcFormatInfo->aspectMask, SrcSubresource)); VkOffset3D srcOffset = { 0, 0, 0 }; VkOffset3D dstOffset = { int32_t(DstX), int32_t(DstY), int32_t(DstZ) }; VkExtent3D srcExtent = srcTexture->MipLevelExtent(srcLayers.mipLevel); if (pSrcBox) { srcOffset.x = pSrcBox->left; srcOffset.y = pSrcBox->top; srcOffset.z = pSrcBox->front; srcExtent.width = pSrcBox->right - pSrcBox->left; srcExtent.height = pSrcBox->bottom - pSrcBox->top; srcExtent.depth = pSrcBox->back - pSrcBox->front; } CopyImage( dstTexture, &dstLayers, dstOffset, srcTexture, &srcLayers, srcOffset, srcExtent); } } template void STDMETHODCALLTYPE D3D11CommonContext::CopyResource( ID3D11Resource* pDstResource, ID3D11Resource* pSrcResource) { D3D10DeviceLock lock = LockContext(); if (!pDstResource || !pSrcResource || (pDstResource == pSrcResource)) return; D3D11_RESOURCE_DIMENSION dstResourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; D3D11_RESOURCE_DIMENSION srcResourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; pDstResource->GetType(&dstResourceDim); pSrcResource->GetType(&srcResourceDim); if (dstResourceDim != srcResourceDim) return; if (dstResourceDim == D3D11_RESOURCE_DIMENSION_BUFFER) { auto dstBuffer = static_cast(pDstResource); auto srcBuffer = static_cast(pSrcResource); if (dstBuffer->Desc()->ByteWidth != srcBuffer->Desc()->ByteWidth) return; CopyBuffer(dstBuffer, 0, srcBuffer, 0, -1); } else { auto dstTexture = GetCommonTexture(pDstResource); auto srcTexture = GetCommonTexture(pSrcResource); auto dstDesc = dstTexture->Desc(); auto srcDesc = srcTexture->Desc(); // The subresource count must match as well if (dstDesc->ArraySize != srcDesc->ArraySize || dstDesc->MipLevels != srcDesc->MipLevels) return; auto dstFormatInfo = lookupFormatInfo(dstTexture->GetPackedFormat()); auto srcFormatInfo = lookupFormatInfo(srcTexture->GetPackedFormat()); for (uint32_t i = 0; i < dstDesc->MipLevels; i++) { VkImageSubresourceLayers dstLayers = { dstFormatInfo->aspectMask, i, 0, dstDesc->ArraySize }; VkImageSubresourceLayers srcLayers = { srcFormatInfo->aspectMask, i, 0, srcDesc->ArraySize }; CopyImage( dstTexture, &dstLayers, VkOffset3D(), srcTexture, &srcLayers, VkOffset3D(), srcTexture->MipLevelExtent(i)); } } } template void STDMETHODCALLTYPE D3D11CommonContext::CopyStructureCount( ID3D11Buffer* pDstBuffer, UINT DstAlignedByteOffset, ID3D11UnorderedAccessView* pSrcView) { D3D10DeviceLock lock = LockContext(); auto buf = static_cast(pDstBuffer); auto uav = static_cast(pSrcView); if (!buf || !uav) return; auto counterSlice = uav->GetCounterSlice(); if (!counterSlice.defined()) return; EmitCs([ cDstSlice = buf->GetBufferSlice(DstAlignedByteOffset), cSrcSlice = std::move(counterSlice) ] (DxvkContext* ctx) { ctx->copyBuffer( cDstSlice.buffer(), cDstSlice.offset(), cSrcSlice.buffer(), cSrcSlice.offset(), sizeof(uint32_t)); }); if (buf->HasSequenceNumber()) GetTypedContext()->TrackBufferSequenceNumber(buf); } template void STDMETHODCALLTYPE D3D11CommonContext::ClearRenderTargetView( ID3D11RenderTargetView* pRenderTargetView, const FLOAT ColorRGBA[4]) { D3D10DeviceLock lock = LockContext(); auto rtv = static_cast(pRenderTargetView); if (!rtv) return; auto view = rtv->GetImageView(); auto color = ConvertColorValue(ColorRGBA, view->formatInfo()); EmitCs([ cClearValue = color, cImageView = std::move(view) ] (DxvkContext* ctx) { ctx->clearRenderTarget( cImageView, VK_IMAGE_ASPECT_COLOR_BIT, cClearValue); }); } template void STDMETHODCALLTYPE D3D11CommonContext::ClearUnorderedAccessViewUint( ID3D11UnorderedAccessView* pUnorderedAccessView, const UINT Values[4]) { D3D10DeviceLock lock = LockContext(); auto uav = static_cast(pUnorderedAccessView); if (!uav) return; // Gather UAV format info. We'll use this to determine // whether we need to create a temporary view or not. D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc; uav->GetDesc(&uavDesc); VkFormat uavFormat = m_parent->LookupFormat(uavDesc.Format, DXGI_VK_FORMAT_MODE_ANY).Format; VkFormat rawFormat = m_parent->LookupFormat(uavDesc.Format, DXGI_VK_FORMAT_MODE_RAW).Format; if (uavFormat != rawFormat && rawFormat == VK_FORMAT_UNDEFINED) { Logger::err(str::format("D3D11: ClearUnorderedAccessViewUint: No raw format found for ", uavFormat)); return; } VkClearValue clearValue; // R11G11B10 is a special case since there's no corresponding // integer format with the same bit layout. Use R32 instead. if (uavFormat == VK_FORMAT_B10G11R11_UFLOAT_PACK32) { clearValue.color.uint32[0] = ((Values[0] & 0x7FF) << 0) | ((Values[1] & 0x7FF) << 11) | ((Values[2] & 0x3FF) << 22); clearValue.color.uint32[1] = 0; clearValue.color.uint32[2] = 0; clearValue.color.uint32[3] = 0; } else { clearValue.color.uint32[0] = Values[0]; clearValue.color.uint32[1] = Values[1]; clearValue.color.uint32[2] = Values[2]; clearValue.color.uint32[3] = Values[3]; } if (uav->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) { // In case of raw and structured buffers as well as typed // buffers that can be used for atomic operations, we can // use the fast Vulkan buffer clear function. Rc bufferView = uav->GetBufferView(); if (bufferView->info().format == VK_FORMAT_R32_UINT || bufferView->info().format == VK_FORMAT_R32_SINT || bufferView->info().format == VK_FORMAT_R32_SFLOAT || bufferView->info().format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) { EmitCs([ cClearValue = clearValue.color.uint32[0], cDstSlice = bufferView->slice() ] (DxvkContext* ctx) { ctx->clearBuffer( cDstSlice.buffer(), cDstSlice.offset(), cDstSlice.length(), cClearValue); }); } else { // Create a view with an integer format if necessary if (uavFormat != rawFormat) { DxvkBufferViewCreateInfo info = bufferView->info(); info.format = rawFormat; bufferView = m_device->createBufferView( bufferView->buffer(), info); } EmitCs([ cClearValue = clearValue, cDstView = bufferView ] (DxvkContext* ctx) { ctx->clearBufferView( cDstView, 0, cDstView->elementCount(), cClearValue.color); }); } } else { Rc imageView = uav->GetImageView(); // If the clear value is zero, we can use the original view regardless of // the format since the bit pattern will not change in any supported format. bool isZeroClearValue = !(clearValue.color.uint32[0] | clearValue.color.uint32[1] | clearValue.color.uint32[2] | clearValue.color.uint32[3]); // Check if we can create an image view with the given raw format. If not, // we'll have to use a fallback using a texel buffer view and buffer copies. bool isViewCompatible = uavFormat == rawFormat; if (!isViewCompatible && (imageView->imageInfo().flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) { uint32_t formatCount = imageView->imageInfo().viewFormatCount; isViewCompatible = formatCount == 0; for (uint32_t i = 0; i < formatCount && !isViewCompatible; i++) isViewCompatible = imageView->imageInfo().viewFormats[i] == rawFormat; } if (isViewCompatible || isZeroClearValue) { // Create a view with an integer format if necessary if (uavFormat != rawFormat && !isZeroClearValue) { DxvkImageViewCreateInfo info = imageView->info(); info.format = rawFormat; imageView = m_device->createImageView(imageView->image(), info); } EmitCs([ cClearValue = clearValue, cDstView = imageView ] (DxvkContext* ctx) { ctx->clearImageView(cDstView, VkOffset3D { 0, 0, 0 }, cDstView->mipLevelExtent(0), VK_IMAGE_ASPECT_COLOR_BIT, cClearValue); }); } else { DxvkBufferCreateInfo bufferInfo; bufferInfo.size = imageView->formatInfo()->elementSize * imageView->info().numLayers * util::flattenImageExtent(imageView->mipLevelExtent(0)); bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; bufferInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; bufferInfo.access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; Rc buffer = m_device->createBuffer(bufferInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); DxvkBufferViewCreateInfo bufferViewInfo; bufferViewInfo.format = rawFormat; bufferViewInfo.rangeOffset = 0; bufferViewInfo.rangeLength = bufferInfo.size; Rc bufferView = m_device->createBufferView(buffer, bufferViewInfo); EmitCs([ cDstView = std::move(imageView), cSrcView = std::move(bufferView), cClearValue = clearValue.color ] (DxvkContext* ctx) { ctx->clearBufferView( cSrcView, 0, cSrcView->elementCount(), cClearValue); ctx->copyBufferToImage(cDstView->image(), vk::pickSubresourceLayers(cDstView->subresources(), 0), VkOffset3D { 0, 0, 0 }, cDstView->mipLevelExtent(0), cSrcView->buffer(), 0, 0, 0); }); } } } template void STDMETHODCALLTYPE D3D11CommonContext::ClearUnorderedAccessViewFloat( ID3D11UnorderedAccessView* pUnorderedAccessView, const FLOAT Values[4]) { D3D10DeviceLock lock = LockContext(); auto uav = static_cast(pUnorderedAccessView); if (!uav) return; auto imgView = uav->GetImageView(); auto bufView = uav->GetBufferView(); const DxvkFormatInfo* info = nullptr; if (imgView != nullptr) info = imgView->formatInfo(); if (bufView != nullptr) info = bufView->formatInfo(); if (!info || info->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt)) return; VkClearValue clearValue; clearValue.color.float32[0] = Values[0]; clearValue.color.float32[1] = Values[1]; clearValue.color.float32[2] = Values[2]; clearValue.color.float32[3] = Values[3]; if (uav->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) { EmitCs([ cClearValue = clearValue, cDstView = std::move(bufView) ] (DxvkContext* ctx) { ctx->clearBufferView( cDstView, 0, cDstView->elementCount(), cClearValue.color); }); } else { EmitCs([ cClearValue = clearValue, cDstView = std::move(imgView) ] (DxvkContext* ctx) { ctx->clearImageView(cDstView, VkOffset3D { 0, 0, 0 }, cDstView->mipLevelExtent(0), VK_IMAGE_ASPECT_COLOR_BIT, cClearValue); }); } } template void STDMETHODCALLTYPE D3D11CommonContext::ClearDepthStencilView( ID3D11DepthStencilView* pDepthStencilView, UINT ClearFlags, FLOAT Depth, UINT8 Stencil) { D3D10DeviceLock lock = LockContext(); auto dsv = static_cast(pDepthStencilView); if (!dsv) return; // Figure out which aspects to clear based on // the image view properties and clear flags. VkImageAspectFlags aspectMask = 0; if (ClearFlags & D3D11_CLEAR_DEPTH) aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; if (ClearFlags & D3D11_CLEAR_STENCIL) aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; aspectMask &= dsv->GetWritableAspectMask(); if (!aspectMask) return; VkClearValue clearValue; clearValue.depthStencil.depth = Depth; clearValue.depthStencil.stencil = Stencil; EmitCs([ cClearValue = clearValue, cAspectMask = aspectMask, cImageView = dsv->GetImageView() ] (DxvkContext* ctx) { ctx->clearRenderTarget( cImageView, cAspectMask, cClearValue); }); } template void STDMETHODCALLTYPE D3D11CommonContext::ClearView( ID3D11View* pView, const FLOAT Color[4], const D3D11_RECT* pRect, UINT NumRects) { D3D10DeviceLock lock = LockContext(); if (NumRects && !pRect) return; // ID3D11View has no methods to query the exact type of // the view, so we'll have to check each possible class auto dsv = dynamic_cast(pView); auto rtv = dynamic_cast(pView); auto uav = dynamic_cast(pView); auto vov = dynamic_cast(pView); // Retrieve underlying resource view Rc bufView; Rc imgView; if (dsv != nullptr) imgView = dsv->GetImageView(); if (rtv != nullptr) imgView = rtv->GetImageView(); if (uav != nullptr) { bufView = uav->GetBufferView(); imgView = uav->GetImageView(); } if (vov != nullptr) imgView = vov->GetView(); // 3D views are unsupported if (imgView != nullptr && imgView->info().type == VK_IMAGE_VIEW_TYPE_3D) return; // Query the view format. We'll have to convert // the clear color based on the format's data type. VkFormat format = VK_FORMAT_UNDEFINED; if (bufView != nullptr) format = bufView->info().format; if (imgView != nullptr) format = imgView->info().format; if (format == VK_FORMAT_UNDEFINED) return; // We'll need the format info to determine the buffer // element size, and we also need it for depth images. const DxvkFormatInfo* formatInfo = lookupFormatInfo(format); // Convert the clear color format. ClearView takes // the clear value for integer formats as a set of // integral floats, so we'll have to convert. VkClearValue clearValue = ConvertColorValue(Color, formatInfo); VkImageAspectFlags clearAspect = formatInfo->aspectMask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT); // Clear all the rectangles that are specified for (uint32_t i = 0; i < NumRects || i < 1; i++) { if (pRect) { if (pRect[i].left >= pRect[i].right || pRect[i].top >= pRect[i].bottom) continue; } if (bufView != nullptr) { VkDeviceSize offset = 0; VkDeviceSize length = bufView->info().rangeLength / formatInfo->elementSize; if (pRect) { offset = pRect[i].left; length = pRect[i].right - pRect[i].left; } EmitCs([ cBufferView = bufView, cRangeOffset = offset, cRangeLength = length, cClearValue = clearValue ] (DxvkContext* ctx) { ctx->clearBufferView( cBufferView, cRangeOffset, cRangeLength, cClearValue.color); }); } if (imgView != nullptr) { VkOffset3D offset = { 0, 0, 0 }; VkExtent3D extent = imgView->mipLevelExtent(0); if (pRect) { offset = { pRect[i].left, pRect[i].top, 0 }; extent = { uint32_t(pRect[i].right - pRect[i].left), uint32_t(pRect[i].bottom - pRect[i].top), 1 }; } EmitCs([ cImageView = imgView, cAreaOffset = offset, cAreaExtent = extent, cClearAspect = clearAspect, cClearValue = clearValue ] (DxvkContext* ctx) { const VkImageUsageFlags rtUsage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; bool isFullSize = cImageView->mipLevelExtent(0) == cAreaExtent; if ((cImageView->info().usage & rtUsage) && isFullSize) { ctx->clearRenderTarget( cImageView, cClearAspect, cClearValue); } else { ctx->clearImageView( cImageView, cAreaOffset, cAreaExtent, cClearAspect, cClearValue); } }); } } } template void STDMETHODCALLTYPE D3D11CommonContext::GenerateMips(ID3D11ShaderResourceView* pShaderResourceView) { D3D10DeviceLock lock = LockContext(); auto view = static_cast(pShaderResourceView); if (!view || view->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) return; D3D11_COMMON_RESOURCE_DESC resourceDesc = view->GetResourceDesc(); if (!(resourceDesc.MiscFlags & D3D11_RESOURCE_MISC_GENERATE_MIPS)) return; EmitCs([cDstImageView = view->GetImageView()] (DxvkContext* ctx) { ctx->generateMipmaps(cDstImageView, VK_FILTER_LINEAR); }); } template void STDMETHODCALLTYPE D3D11CommonContext::ResolveSubresource( ID3D11Resource* pDstResource, UINT DstSubresource, ID3D11Resource* pSrcResource, UINT SrcSubresource, DXGI_FORMAT Format) { D3D10DeviceLock lock = LockContext(); bool isSameSubresource = pDstResource == pSrcResource && DstSubresource == SrcSubresource; if (!pDstResource || !pSrcResource || isSameSubresource) return; D3D11_RESOURCE_DIMENSION dstResourceType; D3D11_RESOURCE_DIMENSION srcResourceType; pDstResource->GetType(&dstResourceType); pSrcResource->GetType(&srcResourceType); if (dstResourceType != D3D11_RESOURCE_DIMENSION_TEXTURE2D || srcResourceType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) return; auto dstTexture = static_cast(pDstResource); auto srcTexture = static_cast(pSrcResource); D3D11_TEXTURE2D_DESC dstDesc; D3D11_TEXTURE2D_DESC srcDesc; dstTexture->GetDesc(&dstDesc); srcTexture->GetDesc(&srcDesc); if (dstDesc.SampleDesc.Count != 1) return; D3D11CommonTexture* dstTextureInfo = GetCommonTexture(pDstResource); D3D11CommonTexture* srcTextureInfo = GetCommonTexture(pSrcResource); const DXGI_VK_FORMAT_INFO dstFormatInfo = m_parent->LookupFormat(dstDesc.Format, DXGI_VK_FORMAT_MODE_ANY); const DXGI_VK_FORMAT_INFO srcFormatInfo = m_parent->LookupFormat(srcDesc.Format, DXGI_VK_FORMAT_MODE_ANY); auto dstVulkanFormatInfo = lookupFormatInfo(dstFormatInfo.Format); auto srcVulkanFormatInfo = lookupFormatInfo(srcFormatInfo.Format); if (DstSubresource >= dstTextureInfo->CountSubresources() || SrcSubresource >= srcTextureInfo->CountSubresources()) return; const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex( dstVulkanFormatInfo->aspectMask, DstSubresource); const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex( srcVulkanFormatInfo->aspectMask, SrcSubresource); const VkImageSubresourceLayers dstSubresourceLayers = { dstSubresource.aspectMask, dstSubresource.mipLevel, dstSubresource.arrayLayer, 1 }; const VkImageSubresourceLayers srcSubresourceLayers = { srcSubresource.aspectMask, srcSubresource.mipLevel, srcSubresource.arrayLayer, 1 }; if (srcDesc.SampleDesc.Count == 1 || m_parent->GetOptions()->disableMsaa) { EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), cDstLayers = dstSubresourceLayers, cSrcLayers = srcSubresourceLayers ] (DxvkContext* ctx) { ctx->copyImage( cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 }, cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 }, cDstImage->mipLevelExtent(cDstLayers.mipLevel)); }); } else { const VkFormat format = m_parent->LookupFormat( Format, DXGI_VK_FORMAT_MODE_ANY).Format; EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), cDstSubres = dstSubresourceLayers, cSrcSubres = srcSubresourceLayers, cFormat = format ] (DxvkContext* ctx) { VkImageResolve region; region.srcSubresource = cSrcSubres; region.srcOffset = VkOffset3D { 0, 0, 0 }; region.dstSubresource = cDstSubres; region.dstOffset = VkOffset3D { 0, 0, 0 }; region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel); ctx->resolveImage(cDstImage, cSrcImage, region, cFormat); }); } if (dstTextureInfo->HasSequenceNumber()) GetTypedContext()->TrackTextureSequenceNumber(dstTextureInfo, DstSubresource); } template void STDMETHODCALLTYPE D3D11CommonContext::UpdateSubresource( ID3D11Resource* pDstResource, UINT DstSubresource, const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch) { UpdateResource(pDstResource, DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, 0); } template void STDMETHODCALLTYPE D3D11CommonContext::UpdateSubresource1( ID3D11Resource* pDstResource, UINT DstSubresource, const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch, UINT CopyFlags) { UpdateResource(pDstResource, DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, CopyFlags); } template void STDMETHODCALLTYPE D3D11CommonContext::DrawAuto() { D3D10DeviceLock lock = LockContext(); D3D11Buffer* buffer = m_state.ia.vertexBuffers[0].buffer.ptr(); if (!buffer) return; DxvkBufferSlice vtxBuf = buffer->GetBufferSlice(); DxvkBufferSlice ctrBuf = buffer->GetSOCounter(); if (!ctrBuf.defined()) return; EmitCs([=] (DxvkContext* ctx) { ctx->drawIndirectXfb(ctrBuf, vtxBuf.buffer()->getXfbVertexStride(), vtxBuf.offset()); }); } template void STDMETHODCALLTYPE D3D11CommonContext::Draw( UINT VertexCount, UINT StartVertexLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->draw( VertexCount, 1, StartVertexLocation, 0); }); } template void STDMETHODCALLTYPE D3D11CommonContext::DrawIndexed( UINT IndexCount, UINT StartIndexLocation, INT BaseVertexLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->drawIndexed( IndexCount, 1, StartIndexLocation, BaseVertexLocation, 0); }); } template void STDMETHODCALLTYPE D3D11CommonContext::DrawInstanced( UINT VertexCountPerInstance, UINT InstanceCount, UINT StartVertexLocation, UINT StartInstanceLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->draw( VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation); }); } template void STDMETHODCALLTYPE D3D11CommonContext::DrawIndexedInstanced( UINT IndexCountPerInstance, UINT InstanceCount, UINT StartIndexLocation, INT BaseVertexLocation, UINT StartInstanceLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->drawIndexed( IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation); }); } template void STDMETHODCALLTYPE D3D11CommonContext::DrawIndexedInstancedIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { D3D10DeviceLock lock = LockContext(); SetDrawBuffers(pBufferForArgs, nullptr); if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand))) return; // If possible, batch up multiple indirect draw calls of // the same type into one single multiDrawIndirect call auto cmdData = static_cast(m_cmdData); auto stride = 0u; if (cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed) stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand)); if (stride) { cmdData->count += 1; cmdData->stride = stride; } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { ctx->drawIndexedIndirect(data->offset, data->count, data->stride); }); cmdData->type = D3D11CmdType::DrawIndirectIndexed; cmdData->offset = AlignedByteOffsetForArgs; cmdData->count = 1; cmdData->stride = 0; } } template void STDMETHODCALLTYPE D3D11CommonContext::DrawInstancedIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { D3D10DeviceLock lock = LockContext(); SetDrawBuffers(pBufferForArgs, nullptr); if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand))) return; // If possible, batch up multiple indirect draw calls of // the same type into one single multiDrawIndirect call auto cmdData = static_cast(m_cmdData); auto stride = 0u; if (cmdData && cmdData->type == D3D11CmdType::DrawIndirect) stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand)); if (stride) { cmdData->count += 1; cmdData->stride = stride; } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { ctx->drawIndirect(data->offset, data->count, data->stride); }); cmdData->type = D3D11CmdType::DrawIndirect; cmdData->offset = AlignedByteOffsetForArgs; cmdData->count = 1; cmdData->stride = 0; } } template void STDMETHODCALLTYPE D3D11CommonContext::Dispatch( UINT ThreadGroupCountX, UINT ThreadGroupCountY, UINT ThreadGroupCountZ) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->dispatch( ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); }); } template void STDMETHODCALLTYPE D3D11CommonContext::DispatchIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { D3D10DeviceLock lock = LockContext(); SetDrawBuffers(pBufferForArgs, nullptr); if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDispatchIndirectCommand))) return; EmitCs([cOffset = AlignedByteOffsetForArgs] (DxvkContext* ctx) { ctx->dispatchIndirect(cOffset); }); } template void STDMETHODCALLTYPE D3D11CommonContext::IASetInputLayout(ID3D11InputLayout* pInputLayout) { D3D10DeviceLock lock = LockContext(); auto inputLayout = static_cast(pInputLayout); if (m_state.ia.inputLayout != inputLayout) { bool equal = false; // Some games (e.g. Grim Dawn) create lots and lots of // identical input layouts, so we'll only apply the state // if the input layouts has actually changed between calls. if (m_state.ia.inputLayout != nullptr && inputLayout != nullptr) equal = m_state.ia.inputLayout->Compare(inputLayout); m_state.ia.inputLayout = inputLayout; if (!equal) ApplyInputLayout(); } } template void STDMETHODCALLTYPE D3D11CommonContext::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY Topology) { D3D10DeviceLock lock = LockContext(); if (m_state.ia.primitiveTopology != Topology) { m_state.ia.primitiveTopology = Topology; ApplyPrimitiveTopology(); } } template void STDMETHODCALLTYPE D3D11CommonContext::IASetVertexBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppVertexBuffers, const UINT* pStrides, const UINT* pOffsets) { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumBuffers; i++) { auto newBuffer = static_cast(ppVertexBuffers[i]); if (m_state.ia.vertexBuffers[StartSlot + i].buffer != newBuffer) { m_state.ia.vertexBuffers[StartSlot + i].buffer = newBuffer; m_state.ia.vertexBuffers[StartSlot + i].offset = pOffsets[i]; m_state.ia.vertexBuffers[StartSlot + i].stride = pStrides[i]; BindVertexBuffer(StartSlot + i, newBuffer, pOffsets[i], pStrides[i]); } else if (m_state.ia.vertexBuffers[StartSlot + i].offset != pOffsets[i] || m_state.ia.vertexBuffers[StartSlot + i].stride != pStrides[i]) { m_state.ia.vertexBuffers[StartSlot + i].offset = pOffsets[i]; m_state.ia.vertexBuffers[StartSlot + i].stride = pStrides[i]; BindVertexBufferRange(StartSlot + i, newBuffer, pOffsets[i], pStrides[i]); } } m_state.ia.maxVbCount = std::clamp(StartSlot + NumBuffers, m_state.ia.maxVbCount, uint32_t(m_state.ia.vertexBuffers.size())); } template void STDMETHODCALLTYPE D3D11CommonContext::IASetIndexBuffer( ID3D11Buffer* pIndexBuffer, DXGI_FORMAT Format, UINT Offset) { D3D10DeviceLock lock = LockContext(); auto newBuffer = static_cast(pIndexBuffer); if (m_state.ia.indexBuffer.buffer != newBuffer) { m_state.ia.indexBuffer.buffer = newBuffer; m_state.ia.indexBuffer.offset = Offset; m_state.ia.indexBuffer.format = Format; BindIndexBuffer(newBuffer, Offset, Format); } else if (m_state.ia.indexBuffer.offset != Offset || m_state.ia.indexBuffer.format != Format) { m_state.ia.indexBuffer.offset = Offset; m_state.ia.indexBuffer.format = Format; BindIndexBufferRange(newBuffer, Offset, Format); } } template void STDMETHODCALLTYPE D3D11CommonContext::IAGetInputLayout(ID3D11InputLayout** ppInputLayout) { D3D10DeviceLock lock = LockContext(); *ppInputLayout = m_state.ia.inputLayout.ref(); } template void STDMETHODCALLTYPE D3D11CommonContext::IAGetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY* pTopology) { D3D10DeviceLock lock = LockContext(); *pTopology = m_state.ia.primitiveTopology; } template void STDMETHODCALLTYPE D3D11CommonContext::IAGetVertexBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppVertexBuffers, UINT* pStrides, UINT* pOffsets) { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumBuffers; i++) { const bool inRange = StartSlot + i < m_state.ia.vertexBuffers.size(); if (ppVertexBuffers) { ppVertexBuffers[i] = inRange ? m_state.ia.vertexBuffers[StartSlot + i].buffer.ref() : nullptr; } if (pStrides) { pStrides[i] = inRange ? m_state.ia.vertexBuffers[StartSlot + i].stride : 0u; } if (pOffsets) { pOffsets[i] = inRange ? m_state.ia.vertexBuffers[StartSlot + i].offset : 0u; } } } template void STDMETHODCALLTYPE D3D11CommonContext::IAGetIndexBuffer( ID3D11Buffer** ppIndexBuffer, DXGI_FORMAT* pFormat, UINT* pOffset) { D3D10DeviceLock lock = LockContext(); if (ppIndexBuffer) *ppIndexBuffer = m_state.ia.indexBuffer.buffer.ref(); if (pFormat) *pFormat = m_state.ia.indexBuffer.format; if (pOffset) *pOffset = m_state.ia.indexBuffer.offset; } template void STDMETHODCALLTYPE D3D11CommonContext::VSSetShader( ID3D11VertexShader* pVertexShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { D3D10DeviceLock lock = LockContext(); auto shader = static_cast(pVertexShader); if (NumClassInstances) Logger::err("D3D11: Class instances not supported"); if (m_state.vs != shader) { m_state.vs = shader; BindShader(GetCommonShader(shader)); } } template void STDMETHODCALLTYPE D3D11CommonContext::VSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers); } template void STDMETHODCALLTYPE D3D11CommonContext::VSSetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers1( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::VSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); SetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::VSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { D3D10DeviceLock lock = LockContext(); SetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::VSGetShader( ID3D11VertexShader** ppVertexShader, ID3D11ClassInstance** ppClassInstances, UINT* pNumClassInstances) { D3D10DeviceLock lock = LockContext(); if (ppVertexShader) *ppVertexShader = m_state.vs.ref(); if (pNumClassInstances) *pNumClassInstances = 0; } template void STDMETHODCALLTYPE D3D11CommonContext::VSGetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::VSGetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::VSGetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); GetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::VSGetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { D3D10DeviceLock lock = LockContext(); GetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::HSSetShader( ID3D11HullShader* pHullShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { D3D10DeviceLock lock = LockContext(); auto shader = static_cast(pHullShader); if (NumClassInstances) Logger::err("D3D11: Class instances not supported"); if (m_state.hs != shader) { m_state.hs = shader; BindShader(GetCommonShader(shader)); } } template void STDMETHODCALLTYPE D3D11CommonContext::HSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers); } template void STDMETHODCALLTYPE D3D11CommonContext::HSSetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers1( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::HSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); SetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::HSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { D3D10DeviceLock lock = LockContext(); SetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::HSGetShader( ID3D11HullShader** ppHullShader, ID3D11ClassInstance** ppClassInstances, UINT* pNumClassInstances) { D3D10DeviceLock lock = LockContext(); if (ppHullShader) *ppHullShader = m_state.hs.ref(); if (pNumClassInstances) *pNumClassInstances = 0; } template void STDMETHODCALLTYPE D3D11CommonContext::HSGetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::HSGetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::HSGetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); GetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::HSGetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { D3D10DeviceLock lock = LockContext(); GetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::DSSetShader( ID3D11DomainShader* pDomainShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { D3D10DeviceLock lock = LockContext(); auto shader = static_cast(pDomainShader); if (NumClassInstances) Logger::err("D3D11: Class instances not supported"); if (m_state.ds != shader) { m_state.ds = shader; BindShader(GetCommonShader(shader)); } } template void STDMETHODCALLTYPE D3D11CommonContext::DSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers); } template void STDMETHODCALLTYPE D3D11CommonContext::DSSetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers1( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::DSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); SetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::DSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { D3D10DeviceLock lock = LockContext(); SetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::DSGetShader( ID3D11DomainShader** ppDomainShader, ID3D11ClassInstance** ppClassInstances, UINT* pNumClassInstances) { D3D10DeviceLock lock = LockContext(); if (ppDomainShader) *ppDomainShader = m_state.ds.ref(); if (pNumClassInstances) *pNumClassInstances = 0; } template void STDMETHODCALLTYPE D3D11CommonContext::DSGetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::DSGetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::DSGetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); GetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::DSGetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { D3D10DeviceLock lock = LockContext(); GetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::GSSetShader( ID3D11GeometryShader* pShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { D3D10DeviceLock lock = LockContext(); auto shader = static_cast(pShader); if (NumClassInstances) Logger::err("D3D11: Class instances not supported"); if (m_state.gs != shader) { m_state.gs = shader; BindShader(GetCommonShader(shader)); } } template void STDMETHODCALLTYPE D3D11CommonContext::GSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers); } template void STDMETHODCALLTYPE D3D11CommonContext::GSSetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers1( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::GSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); SetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::GSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { D3D10DeviceLock lock = LockContext(); SetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::GSGetShader( ID3D11GeometryShader** ppGeometryShader, ID3D11ClassInstance** ppClassInstances, UINT* pNumClassInstances) { D3D10DeviceLock lock = LockContext(); if (ppGeometryShader) *ppGeometryShader = m_state.gs.ref(); if (pNumClassInstances) *pNumClassInstances = 0; } template void STDMETHODCALLTYPE D3D11CommonContext::GSGetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::GSGetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::GSGetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); GetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::GSGetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { D3D10DeviceLock lock = LockContext(); GetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::PSSetShader( ID3D11PixelShader* pPixelShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { D3D10DeviceLock lock = LockContext(); auto shader = static_cast(pPixelShader); if (NumClassInstances) Logger::err("D3D11: Class instances not supported"); if (m_state.ps != shader) { m_state.ps = shader; BindShader(GetCommonShader(shader)); } } template void STDMETHODCALLTYPE D3D11CommonContext::PSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers); } template void STDMETHODCALLTYPE D3D11CommonContext::PSSetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers1( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::PSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); SetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::PSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { D3D10DeviceLock lock = LockContext(); SetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::PSGetShader( ID3D11PixelShader** ppPixelShader, ID3D11ClassInstance** ppClassInstances, UINT* pNumClassInstances) { D3D10DeviceLock lock = LockContext(); if (ppPixelShader) *ppPixelShader = m_state.ps.ref(); if (pNumClassInstances) *pNumClassInstances = 0; } template void STDMETHODCALLTYPE D3D11CommonContext::PSGetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::PSGetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::PSGetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); GetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::PSGetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { D3D10DeviceLock lock = LockContext(); GetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::CSSetShader( ID3D11ComputeShader* pComputeShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { D3D10DeviceLock lock = LockContext(); auto shader = static_cast(pComputeShader); if (NumClassInstances) Logger::err("D3D11: Class instances not supported"); if (m_state.cs != shader) { m_state.cs = shader; BindShader(GetCommonShader(shader)); } } template void STDMETHODCALLTYPE D3D11CommonContext::CSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers); } template void STDMETHODCALLTYPE D3D11CommonContext::CSSetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); SetConstantBuffers1( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::CSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); SetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::CSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { D3D10DeviceLock lock = LockContext(); SetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::CSSetUnorderedAccessViews( UINT StartSlot, UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, const UINT* pUAVInitialCounts) { D3D10DeviceLock lock = LockContext(); if (TestRtvUavHazards(0, nullptr, NumUAVs, ppUnorderedAccessViews)) return; // Unbind previously bound conflicting UAVs uint32_t uavSlotId = computeUavBinding (DxbcProgramType::ComputeShader, 0); uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::ComputeShader, 0); int32_t uavId = m_state.uav.mask.findNext(0); while (uavId >= 0) { if (uint32_t(uavId) < StartSlot || uint32_t(uavId) >= StartSlot + NumUAVs) { for (uint32_t i = 0; i < NumUAVs; i++) { auto uav = static_cast(ppUnorderedAccessViews[i]); if (CheckViewOverlap(uav, m_state.uav.views[uavId].ptr())) { m_state.uav.views[uavId] = nullptr; m_state.uav.mask.clr(uavId); BindUnorderedAccessView( uavSlotId + uavId, nullptr, ctrSlotId + uavId, ~0u); } } uavId = m_state.uav.mask.findNext(uavId + 1); } else { uavId = m_state.uav.mask.findNext(StartSlot + NumUAVs); } } // Actually bind the given UAVs for (uint32_t i = 0; i < NumUAVs; i++) { auto uav = static_cast(ppUnorderedAccessViews[i]); auto ctr = pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u; if (m_state.uav.views[StartSlot + i] != uav || ctr != ~0u) { m_state.uav.views[StartSlot + i] = uav; m_state.uav.mask.set(StartSlot + i, uav != nullptr); BindUnorderedAccessView( uavSlotId + StartSlot + i, uav, ctrSlotId + StartSlot + i, ctr); ResolveCsSrvHazards(uav); } } m_state.uav.maxCount = std::clamp(StartSlot + NumUAVs, m_state.uav.maxCount, uint32_t(m_state.uav.views.size())); } template void STDMETHODCALLTYPE D3D11CommonContext::CSGetShader( ID3D11ComputeShader** ppComputeShader, ID3D11ClassInstance** ppClassInstances, UINT* pNumClassInstances) { D3D10DeviceLock lock = LockContext(); if (ppComputeShader) *ppComputeShader = m_state.cs.ref(); if (pNumClassInstances) *pNumClassInstances = 0; } template void STDMETHODCALLTYPE D3D11CommonContext::CSGetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::CSGetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { D3D10DeviceLock lock = LockContext(); GetConstantBuffers( StartSlot, NumBuffers, ppConstantBuffers, pFirstConstant, pNumConstants); } template void STDMETHODCALLTYPE D3D11CommonContext::CSGetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { D3D10DeviceLock lock = LockContext(); GetShaderResources( StartSlot, NumViews, ppShaderResourceViews); } template void STDMETHODCALLTYPE D3D11CommonContext::CSGetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { D3D10DeviceLock lock = LockContext(); GetSamplers( StartSlot, NumSamplers, ppSamplers); } template void STDMETHODCALLTYPE D3D11CommonContext::CSGetUnorderedAccessViews( UINT StartSlot, UINT NumUAVs, ID3D11UnorderedAccessView** ppUnorderedAccessViews) { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumUAVs; i++) { ppUnorderedAccessViews[i] = StartSlot + i < m_state.uav.views.size() ? m_state.uav.views[StartSlot + i].ref() : nullptr; } } template void STDMETHODCALLTYPE D3D11CommonContext::OMSetRenderTargets( UINT NumViews, ID3D11RenderTargetView* const* ppRenderTargetViews, ID3D11DepthStencilView* pDepthStencilView) { D3D10DeviceLock lock = LockContext(); if constexpr (!IsDeferred) GetTypedContext()->FlushImplicit(true); SetRenderTargetsAndUnorderedAccessViews( NumViews, ppRenderTargetViews, pDepthStencilView, NumViews, 0, nullptr, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::OMSetRenderTargetsAndUnorderedAccessViews( UINT NumRTVs, ID3D11RenderTargetView* const* ppRenderTargetViews, ID3D11DepthStencilView* pDepthStencilView, UINT UAVStartSlot, UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, const UINT* pUAVInitialCounts) { D3D10DeviceLock lock = LockContext(); if constexpr (!IsDeferred) GetTypedContext()->FlushImplicit(true); SetRenderTargetsAndUnorderedAccessViews( NumRTVs, ppRenderTargetViews, pDepthStencilView, UAVStartSlot, NumUAVs, ppUnorderedAccessViews, pUAVInitialCounts); } template void STDMETHODCALLTYPE D3D11CommonContext::OMSetBlendState( ID3D11BlendState* pBlendState, const FLOAT BlendFactor[4], UINT SampleMask) { D3D10DeviceLock lock = LockContext(); auto blendState = static_cast(pBlendState); if (m_state.om.cbState != blendState || m_state.om.sampleMask != SampleMask) { m_state.om.cbState = blendState; m_state.om.sampleMask = SampleMask; ApplyBlendState(); } if (BlendFactor != nullptr) { for (uint32_t i = 0; i < 4; i++) m_state.om.blendFactor[i] = BlendFactor[i]; ApplyBlendFactor(); } } template void STDMETHODCALLTYPE D3D11CommonContext::OMSetDepthStencilState( ID3D11DepthStencilState* pDepthStencilState, UINT StencilRef) { D3D10DeviceLock lock = LockContext(); auto depthStencilState = static_cast(pDepthStencilState); if (m_state.om.dsState != depthStencilState) { m_state.om.dsState = depthStencilState; ApplyDepthStencilState(); } // The D3D11 runtime only appears to store the low 8 bits, // and some games rely on this behaviour. Do the same here. StencilRef &= 0xFF; if (m_state.om.stencilRef != StencilRef) { m_state.om.stencilRef = StencilRef; ApplyStencilRef(); } } template void STDMETHODCALLTYPE D3D11CommonContext::OMGetRenderTargets( UINT NumViews, ID3D11RenderTargetView** ppRenderTargetViews, ID3D11DepthStencilView** ppDepthStencilView) { OMGetRenderTargetsAndUnorderedAccessViews( NumViews, ppRenderTargetViews, ppDepthStencilView, NumViews, 0, nullptr); } template void STDMETHODCALLTYPE D3D11CommonContext::OMGetRenderTargetsAndUnorderedAccessViews( UINT NumRTVs, ID3D11RenderTargetView** ppRenderTargetViews, ID3D11DepthStencilView** ppDepthStencilView, UINT UAVStartSlot, UINT NumUAVs, ID3D11UnorderedAccessView** ppUnorderedAccessViews) { D3D10DeviceLock lock = LockContext(); if (ppRenderTargetViews) { for (UINT i = 0; i < NumRTVs; i++) { ppRenderTargetViews[i] = i < m_state.om.rtvs.size() ? m_state.om.rtvs[i].ref() : nullptr; } } if (ppDepthStencilView) *ppDepthStencilView = m_state.om.dsv.ref(); if (ppUnorderedAccessViews) { for (UINT i = 0; i < NumUAVs; i++) { ppUnorderedAccessViews[i] = UAVStartSlot + i < m_state.om.uavs.size() ? m_state.om.uavs[UAVStartSlot + i].ref() : nullptr; } } } template void STDMETHODCALLTYPE D3D11CommonContext::OMGetBlendState( ID3D11BlendState** ppBlendState, FLOAT BlendFactor[4], UINT* pSampleMask) { D3D10DeviceLock lock = LockContext(); if (ppBlendState) *ppBlendState = ref(m_state.om.cbState); if (BlendFactor) std::memcpy(BlendFactor, m_state.om.blendFactor, sizeof(FLOAT) * 4); if (pSampleMask) *pSampleMask = m_state.om.sampleMask; } template void STDMETHODCALLTYPE D3D11CommonContext::OMGetDepthStencilState( ID3D11DepthStencilState** ppDepthStencilState, UINT* pStencilRef) { D3D10DeviceLock lock = LockContext(); if (ppDepthStencilState) *ppDepthStencilState = ref(m_state.om.dsState); if (pStencilRef) *pStencilRef = m_state.om.stencilRef; } template void STDMETHODCALLTYPE D3D11CommonContext::RSSetState(ID3D11RasterizerState* pRasterizerState) { D3D10DeviceLock lock = LockContext(); auto currRasterizerState = m_state.rs.state; auto nextRasterizerState = static_cast(pRasterizerState); if (m_state.rs.state != nextRasterizerState) { m_state.rs.state = nextRasterizerState; ApplyRasterizerState(); // If necessary, update the rasterizer sample count push constant uint32_t currSampleCount = currRasterizerState != nullptr ? currRasterizerState->Desc()->ForcedSampleCount : 0; uint32_t nextSampleCount = nextRasterizerState != nullptr ? nextRasterizerState->Desc()->ForcedSampleCount : 0; if (currSampleCount != nextSampleCount) ApplyRasterizerSampleCount(); // In D3D11, the rasterizer state defines whether the scissor test is // enabled, so if that changes, we need to update scissor rects as well. bool currScissorEnable = currRasterizerState != nullptr ? currRasterizerState->Desc()->ScissorEnable : false; bool nextScissorEnable = nextRasterizerState != nullptr ? nextRasterizerState->Desc()->ScissorEnable : false; if (currScissorEnable != nextScissorEnable) ApplyViewportState(); } } template void STDMETHODCALLTYPE D3D11CommonContext::RSSetViewports( UINT NumViewports, const D3D11_VIEWPORT* pViewports) { D3D10DeviceLock lock = LockContext(); if (unlikely(NumViewports > m_state.rs.viewports.size())) return; bool dirty = m_state.rs.numViewports != NumViewports; m_state.rs.numViewports = NumViewports; for (uint32_t i = 0; i < NumViewports; i++) { const D3D11_VIEWPORT& vp = m_state.rs.viewports[i]; dirty |= vp.TopLeftX != pViewports[i].TopLeftX || vp.TopLeftY != pViewports[i].TopLeftY || vp.Width != pViewports[i].Width || vp.Height != pViewports[i].Height || vp.MinDepth != pViewports[i].MinDepth || vp.MaxDepth != pViewports[i].MaxDepth; m_state.rs.viewports[i] = pViewports[i]; } if (dirty) ApplyViewportState(); } template void STDMETHODCALLTYPE D3D11CommonContext::RSSetScissorRects( UINT NumRects, const D3D11_RECT* pRects) { D3D10DeviceLock lock = LockContext(); if (unlikely(NumRects > m_state.rs.scissors.size())) return; bool dirty = m_state.rs.numScissors != NumRects; m_state.rs.numScissors = NumRects; for (uint32_t i = 0; i < NumRects; i++) { if (pRects[i].bottom >= pRects[i].top && pRects[i].right >= pRects[i].left) { const D3D11_RECT& sr = m_state.rs.scissors[i]; dirty |= sr.top != pRects[i].top || sr.left != pRects[i].left || sr.bottom != pRects[i].bottom || sr.right != pRects[i].right; m_state.rs.scissors[i] = pRects[i]; } } if (m_state.rs.state != nullptr && dirty) { D3D11_RASTERIZER_DESC rsDesc; m_state.rs.state->GetDesc(&rsDesc); if (rsDesc.ScissorEnable) ApplyViewportState(); } } template void STDMETHODCALLTYPE D3D11CommonContext::RSGetState(ID3D11RasterizerState** ppRasterizerState) { D3D10DeviceLock lock = LockContext(); if (ppRasterizerState) *ppRasterizerState = ref(m_state.rs.state); } template void STDMETHODCALLTYPE D3D11CommonContext::RSGetViewports( UINT* pNumViewports, D3D11_VIEWPORT* pViewports) { D3D10DeviceLock lock = LockContext(); uint32_t numWritten = m_state.rs.numViewports; if (pViewports) { numWritten = std::min(numWritten, *pNumViewports); for (uint32_t i = 0; i < *pNumViewports; i++) { if (i < m_state.rs.numViewports) { pViewports[i] = m_state.rs.viewports[i]; } else { pViewports[i].TopLeftX = 0.0f; pViewports[i].TopLeftY = 0.0f; pViewports[i].Width = 0.0f; pViewports[i].Height = 0.0f; pViewports[i].MinDepth = 0.0f; pViewports[i].MaxDepth = 0.0f; } } } *pNumViewports = numWritten; } template void STDMETHODCALLTYPE D3D11CommonContext::RSGetScissorRects( UINT* pNumRects, D3D11_RECT* pRects) { D3D10DeviceLock lock = LockContext(); uint32_t numWritten = m_state.rs.numScissors; if (pRects) { numWritten = std::min(numWritten, *pNumRects); for (uint32_t i = 0; i < *pNumRects; i++) { if (i < m_state.rs.numScissors) { pRects[i] = m_state.rs.scissors[i]; } else { pRects[i].left = 0; pRects[i].top = 0; pRects[i].right = 0; pRects[i].bottom = 0; } } } *pNumRects = m_state.rs.numScissors; } template void STDMETHODCALLTYPE D3D11CommonContext::SOSetTargets( UINT NumBuffers, ID3D11Buffer* const* ppSOTargets, const UINT* pOffsets) { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumBuffers; i++) { D3D11Buffer* buffer = static_cast(ppSOTargets[i]); UINT offset = pOffsets != nullptr ? pOffsets[i] : 0; m_state.so.targets[i].buffer = buffer; m_state.so.targets[i].offset = offset; } for (uint32_t i = NumBuffers; i < D3D11_SO_BUFFER_SLOT_COUNT; i++) { m_state.so.targets[i].buffer = nullptr; m_state.so.targets[i].offset = 0; } for (uint32_t i = 0; i < D3D11_SO_BUFFER_SLOT_COUNT; i++) { BindXfbBuffer(i, m_state.so.targets[i].buffer.ptr(), m_state.so.targets[i].offset); } } template void STDMETHODCALLTYPE D3D11CommonContext::SOGetTargets( UINT NumBuffers, ID3D11Buffer** ppSOTargets) { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumBuffers; i++) { ppSOTargets[i] = i < m_state.so.targets.size() ? m_state.so.targets[i].buffer.ref() : nullptr; } } template void STDMETHODCALLTYPE D3D11CommonContext::SOGetTargetsWithOffsets( UINT NumBuffers, ID3D11Buffer** ppSOTargets, UINT* pOffsets) { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumBuffers; i++) { const bool inRange = i < m_state.so.targets.size(); if (ppSOTargets) { ppSOTargets[i] = inRange ? m_state.so.targets[i].buffer.ref() : nullptr; } if (pOffsets) { pOffsets[i] = inRange ? m_state.so.targets[i].offset : 0u; } } } template void STDMETHODCALLTYPE D3D11CommonContext::SetPredication( ID3D11Predicate* pPredicate, BOOL PredicateValue) { D3D10DeviceLock lock = LockContext(); auto predicate = D3D11Query::FromPredicate(pPredicate); m_state.pr.predicateObject = predicate; m_state.pr.predicateValue = PredicateValue; static bool s_errorShown = false; if (pPredicate && !std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::SetPredication: Stub"); } template void STDMETHODCALLTYPE D3D11CommonContext::GetPredication( ID3D11Predicate** ppPredicate, BOOL* pPredicateValue) { D3D10DeviceLock lock = LockContext(); if (ppPredicate) *ppPredicate = D3D11Query::AsPredicate(m_state.pr.predicateObject.ref()); if (pPredicateValue) *pPredicateValue = m_state.pr.predicateValue; } template void STDMETHODCALLTYPE D3D11CommonContext::SetResourceMinLOD( ID3D11Resource* pResource, FLOAT MinLOD) { bool s_errorShown = false; if (std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::SetResourceMinLOD: Not implemented"); } template FLOAT STDMETHODCALLTYPE D3D11CommonContext::GetResourceMinLOD(ID3D11Resource* pResource) { bool s_errorShown = false; if (std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::GetResourceMinLOD: Not implemented"); return 0.0f; } template void STDMETHODCALLTYPE D3D11CommonContext::CopyTiles( ID3D11Resource* pTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pTileRegionStartCoordinate, const D3D11_TILE_REGION_SIZE* pTileRegionSize, ID3D11Buffer* pBuffer, UINT64 BufferStartOffsetInBytes, UINT Flags) { if (!pTiledResource || !pBuffer) return; auto buffer = static_cast(pBuffer); // Get buffer slice and just forward the call VkDeviceSize bufferSize = pTileRegionSize->NumTiles * SparseMemoryPageSize; if (buffer->Desc()->ByteWidth < BufferStartOffsetInBytes + bufferSize) return; DxvkBufferSlice slice = buffer->GetBufferSlice(BufferStartOffsetInBytes, bufferSize); CopyTiledResourceData(pTiledResource, pTileRegionStartCoordinate, pTileRegionSize, slice, Flags); if (buffer->HasSequenceNumber()) GetTypedContext()->TrackBufferSequenceNumber(buffer); } template HRESULT STDMETHODCALLTYPE D3D11CommonContext::CopyTileMappings( ID3D11Resource* pDestTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pDestRegionCoordinate, ID3D11Resource* pSourceTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pSourceRegionCoordinate, const D3D11_TILE_REGION_SIZE* pTileRegionSize, UINT Flags) { if (!pDestTiledResource || !pSourceTiledResource) return E_INVALIDARG; if constexpr (!IsDeferred) GetTypedContext()->FlushImplicit(false); DxvkSparseBindInfo bindInfo; bindInfo.dstResource = GetPagedResource(pDestTiledResource); bindInfo.srcResource = GetPagedResource(pSourceTiledResource); auto dstPageTable = bindInfo.dstResource->getSparsePageTable(); auto srcPageTable = bindInfo.srcResource->getSparsePageTable(); if (!dstPageTable || !srcPageTable) return E_INVALIDARG; if (pDestRegionCoordinate->Subresource >= dstPageTable->getSubresourceCount() || pSourceRegionCoordinate->Subresource >= srcPageTable->getSubresourceCount()) return E_INVALIDARG; VkOffset3D dstRegionOffset = { int32_t(pDestRegionCoordinate->X), int32_t(pDestRegionCoordinate->Y), int32_t(pDestRegionCoordinate->Z) }; VkOffset3D srcRegionOffset = { int32_t(pSourceRegionCoordinate->X), int32_t(pSourceRegionCoordinate->Y), int32_t(pSourceRegionCoordinate->Z) }; VkExtent3D regionExtent = { uint32_t(pTileRegionSize->Width), uint32_t(pTileRegionSize->Height), uint32_t(pTileRegionSize->Depth) }; for (uint32_t i = 0; i < pTileRegionSize->NumTiles; i++) { // We don't know the current tile mappings of either resource since // this may be called on a deferred context and tile mappings are // updated on the CS thread, so just resolve the copy in the backend uint32_t dstTile = dstPageTable->computePageIndex( pDestRegionCoordinate->Subresource, dstRegionOffset, regionExtent, !pTileRegionSize->bUseBox, i); uint32_t srcTile = srcPageTable->computePageIndex( pSourceRegionCoordinate->Subresource, srcRegionOffset, regionExtent, !pTileRegionSize->bUseBox, i); if (dstTile >= dstPageTable->getPageCount() || srcTile >= srcPageTable->getPageCount()) return E_INVALIDARG; DxvkSparseBind bind; bind.mode = DxvkSparseBindMode::Copy; bind.dstPage = dstTile; bind.srcPage = srcTile; bindInfo.binds.push_back(bind); } DxvkSparseBindFlags flags = (Flags & D3D11_TILE_MAPPING_NO_OVERWRITE) ? DxvkSparseBindFlags(DxvkSparseBindFlag::SkipSynchronization) : DxvkSparseBindFlags(); EmitCs([ cBindInfo = std::move(bindInfo), cFlags = flags ] (DxvkContext* ctx) { ctx->updatePageTable(cBindInfo, cFlags); }); return S_OK; } template HRESULT STDMETHODCALLTYPE D3D11CommonContext::ResizeTilePool( ID3D11Buffer* pTilePool, UINT64 NewSizeInBytes) { if (NewSizeInBytes % SparseMemoryPageSize) return E_INVALIDARG; auto buffer = static_cast(pTilePool); if (!buffer->IsTilePool()) return E_INVALIDARG; // Perform the resize operation. This is somewhat trivialized // since all lifetime tracking is done by the backend. EmitCs([ cAllocator = buffer->GetSparseAllocator(), cPageCount = NewSizeInBytes / SparseMemoryPageSize ] (DxvkContext* ctx) { cAllocator->setCapacity(cPageCount); }); return S_OK; } template void STDMETHODCALLTYPE D3D11CommonContext::TiledResourceBarrier( ID3D11DeviceChild* pTiledResourceOrViewAccessBeforeBarrier, ID3D11DeviceChild* pTiledResourceOrViewAccessAfterBarrier) { DxvkGlobalPipelineBarrier srcBarrier = GetTiledResourceDependency(pTiledResourceOrViewAccessBeforeBarrier); DxvkGlobalPipelineBarrier dstBarrier = GetTiledResourceDependency(pTiledResourceOrViewAccessAfterBarrier); if (srcBarrier.stages && dstBarrier.stages) { EmitCs([ cSrcBarrier = srcBarrier, cDstBarrier = dstBarrier ] (DxvkContext* ctx) { ctx->emitGraphicsBarrier( cSrcBarrier.stages, cSrcBarrier.access, cDstBarrier.stages, cDstBarrier.access); }); } } template HRESULT STDMETHODCALLTYPE D3D11CommonContext::UpdateTileMappings( ID3D11Resource* pTiledResource, UINT NumRegions, const D3D11_TILED_RESOURCE_COORDINATE* pRegionCoordinates, const D3D11_TILE_REGION_SIZE* pRegionSizes, ID3D11Buffer* pTilePool, UINT NumRanges, const UINT* pRangeFlags, const UINT* pRangeTileOffsets, const UINT* pRangeTileCounts, UINT Flags) { if (!pTiledResource || !NumRegions || !NumRanges) return E_INVALIDARG; if constexpr (!IsDeferred) GetTypedContext()->FlushImplicit(false); // Find sparse allocator if the tile pool is defined DxvkSparseBindInfo bindInfo; if (pTilePool) { auto tilePool = static_cast(pTilePool); bindInfo.srcAllocator = tilePool->GetSparseAllocator(); if (bindInfo.srcAllocator == nullptr) return E_INVALIDARG; } // Find resource and sparse page table for the given resource bindInfo.dstResource = GetPagedResource(pTiledResource); auto pageTable = bindInfo.dstResource->getSparsePageTable(); if (!pageTable) return E_INVALIDARG; // Lookup table in case the app tries to bind the same // page multiple times. We should resolve that here and // only consider the last bind to any given page. std::vector bindIndices(pageTable->getPageCount(), ~0u); // This function allows pretty much every parameter to be nullptr // in some way, so initialize some defaults as necessary D3D11_TILED_RESOURCE_COORDINATE regionCoord = { }; D3D11_TILE_REGION_SIZE regionSize = { }; if (!pRegionSizes) { regionSize.NumTiles = pRegionCoordinates ? 1 : pageTable->getPageCount(); } uint32_t rangeFlag = 0u; uint32_t rangeTileOffset = 0u; uint32_t rangeTileCount = ~0u; // For now, just generate a simple list of tile index to // page index mappings, and let the backend optimize later uint32_t regionIdx = 0u; uint32_t regionTile = 0u; uint32_t rangeIdx = 0u; uint32_t rangeTile = 0u; while (regionIdx < NumRegions && rangeIdx < NumRanges) { if (!regionTile) { if (pRegionCoordinates) regionCoord = pRegionCoordinates[regionIdx]; if (pRegionSizes) regionSize = pRegionSizes[regionIdx]; } if (!rangeTile) { if (pRangeFlags) rangeFlag = pRangeFlags[rangeIdx]; if (pRangeTileOffsets) rangeTileOffset = pRangeTileOffsets[rangeIdx]; if (pRangeTileCounts) rangeTileCount = pRangeTileCounts[rangeIdx]; } if (!(rangeFlag & D3D11_TILE_RANGE_SKIP)) { if (regionCoord.Subresource >= pageTable->getSubresourceCount()) return E_INVALIDARG; if (regionSize.bUseBox && regionSize.NumTiles != regionSize.Width * regionSize.Height * regionSize.Depth) return E_INVALIDARG; VkOffset3D regionOffset = { int32_t(regionCoord.X), int32_t(regionCoord.Y), int32_t(regionCoord.Z) }; VkExtent3D regionExtent = { uint32_t(regionSize.Width), uint32_t(regionSize.Height), uint32_t(regionSize.Depth) }; uint32_t resourceTile = pageTable->computePageIndex(regionCoord.Subresource, regionOffset, regionExtent, !regionSize.bUseBox, regionTile); // Fill in bind info for the current tile DxvkSparseBind bind = { }; bind.dstPage = resourceTile; if (rangeFlag & D3D11_TILE_RANGE_NULL) { bind.mode = DxvkSparseBindMode::Null; } else if (pTilePool) { bind.mode = DxvkSparseBindMode::Bind; bind.srcPage = rangeFlag & D3D11_TILE_RANGE_REUSE_SINGLE_TILE ? rangeTileOffset : rangeTileOffset + rangeTile; } else { return E_INVALIDARG; } // Add bind info to the bind list, overriding // any existing bind for the same resource page if (resourceTile < pageTable->getPageCount()) { if (bindIndices[resourceTile] < bindInfo.binds.size()) bindInfo.binds[bindIndices[resourceTile]] = bind; else bindInfo.binds.push_back(bind); } } if (++regionTile == regionSize.NumTiles) { regionTile = 0; regionIdx += 1; } if (++rangeTile == rangeTileCount) { rangeTile = 0; rangeIdx += 1; } } // Translate flags. The backend benefits from NO_OVERWRITE since // otherwise we have to serialize execution of the current command // buffer, the sparse binding operation, and subsequent commands. // With NO_OVERWRITE, we can execute it more or less asynchronously. DxvkSparseBindFlags flags = (Flags & D3D11_TILE_MAPPING_NO_OVERWRITE) ? DxvkSparseBindFlags(DxvkSparseBindFlag::SkipSynchronization) : DxvkSparseBindFlags(); EmitCs([ cBindInfo = std::move(bindInfo), cFlags = flags ] (DxvkContext* ctx) { ctx->updatePageTable(cBindInfo, cFlags); }); return S_OK; } template void STDMETHODCALLTYPE D3D11CommonContext::UpdateTiles( ID3D11Resource* pDestTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pDestTileRegionStartCoordinate, const D3D11_TILE_REGION_SIZE* pDestTileRegionSize, const void* pSourceTileData, UINT Flags) { if (!pDestTiledResource || !pSourceTileData) return; // Allocate staging memory and copy source data into it, at a // 64k page granularity. It is not clear whether this behaviour // is correct in case we're writing to incmplete pages. VkDeviceSize bufferSize = pDestTileRegionSize->NumTiles * SparseMemoryPageSize; DxvkBufferSlice slice = AllocStagingBuffer(bufferSize); std::memcpy(slice.mapPtr(0), pSourceTileData, bufferSize); // Fix up flags. The runtime probably validates this in some // way but our internal function relies on correct flags anyway. Flags &= D3D11_TILE_MAPPING_NO_OVERWRITE; Flags |= D3D11_TILE_COPY_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE; CopyTiledResourceData(pDestTiledResource, pDestTileRegionStartCoordinate, pDestTileRegionSize, slice, Flags); } template BOOL STDMETHODCALLTYPE D3D11CommonContext::IsAnnotationEnabled() { return m_annotation.GetStatus(); } template void STDMETHODCALLTYPE D3D11CommonContext::SetMarkerInt( LPCWSTR pLabel, INT Data) { // Not implemented in the backend, ignore } template void STDMETHODCALLTYPE D3D11CommonContext::BeginEventInt( LPCWSTR pLabel, INT Data) { // Not implemented in the backend, ignore } template void STDMETHODCALLTYPE D3D11CommonContext::EndEvent() { // Not implemented in the backend, ignore } template void STDMETHODCALLTYPE D3D11CommonContext::GetHardwareProtectionState( BOOL* pHwProtectionEnable) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::GetHardwareProtectionState: Not implemented"); if (pHwProtectionEnable) *pHwProtectionEnable = FALSE; } template void STDMETHODCALLTYPE D3D11CommonContext::SetHardwareProtectionState( BOOL HwProtectionEnable) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::SetHardwareProtectionState: Not implemented"); } template void STDMETHODCALLTYPE D3D11CommonContext::TransitionSurfaceLayout( IDXGIVkInteropSurface* pSurface, const VkImageSubresourceRange* pSubresources, VkImageLayout OldLayout, VkImageLayout NewLayout) { D3D10DeviceLock lock = LockContext(); // Get the underlying D3D11 resource Com resource; pSurface->QueryInterface(__uuidof(ID3D11Resource), reinterpret_cast(&resource)); // Get the texture from that resource D3D11CommonTexture* texture = GetCommonTexture(resource.ptr()); EmitCs([ cImage = texture->GetImage(), cSubresources = *pSubresources, cOldLayout = OldLayout, cNewLayout = NewLayout ] (DxvkContext* ctx) { ctx->transformImage( cImage, cSubresources, cOldLayout, cNewLayout); }); } template DxvkCsChunkRef D3D11CommonContext::AllocCsChunk() { return m_parent->AllocCsChunk(m_csFlags); } template DxvkDataSlice D3D11CommonContext::AllocUpdateBufferSlice(size_t Size) { constexpr size_t UpdateBufferSize = 1 * 1024 * 1024; if (Size >= UpdateBufferSize) { Rc buffer = new DxvkDataBuffer(Size); return buffer->alloc(Size); } else { if (m_updateBuffer == nullptr) m_updateBuffer = new DxvkDataBuffer(UpdateBufferSize); DxvkDataSlice slice = m_updateBuffer->alloc(Size); if (slice.ptr() == nullptr) { m_updateBuffer = new DxvkDataBuffer(UpdateBufferSize); slice = m_updateBuffer->alloc(Size); } return slice; } } template DxvkBufferSlice D3D11CommonContext::AllocStagingBuffer( VkDeviceSize Size) { return m_staging.alloc(256, Size); } template void D3D11CommonContext::ApplyInputLayout() { auto inputLayout = m_state.ia.inputLayout.prvRef(); if (likely(inputLayout != nullptr)) { EmitCs([ cInputLayout = std::move(inputLayout) ] (DxvkContext* ctx) { cInputLayout->BindToContext(ctx); }); } else { EmitCs([] (DxvkContext* ctx) { ctx->setInputLayout(0, nullptr, 0, nullptr); }); } } template void D3D11CommonContext::ApplyPrimitiveTopology() { D3D11_PRIMITIVE_TOPOLOGY topology = m_state.ia.primitiveTopology; DxvkInputAssemblyState iaState = { }; if (topology <= D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ) { static const std::array s_iaStates = {{ { VK_PRIMITIVE_TOPOLOGY_MAX_ENUM, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, VK_TRUE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, VK_TRUE, 0 }, { }, { }, { }, { }, // Random gap that exists for no reason { VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY, VK_TRUE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY, VK_TRUE, 0 }, }}; iaState = s_iaStates[uint32_t(topology)]; } else if (topology >= D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST && topology <= D3D11_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST) { // The number of control points per patch can be inferred from the enum value in D3D11 uint32_t vertexCount = uint32_t(topology - D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1); iaState = { VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, VK_FALSE, vertexCount }; } EmitCs([iaState] (DxvkContext* ctx) { ctx->setInputAssemblyState(iaState); }); } template void D3D11CommonContext::ApplyBlendState() { if (m_state.om.cbState != nullptr) { EmitCs([ cBlendState = m_state.om.cbState, cSampleMask = m_state.om.sampleMask ] (DxvkContext* ctx) { cBlendState->BindToContext(ctx, cSampleMask); }); } else { EmitCs([ cSampleMask = m_state.om.sampleMask ] (DxvkContext* ctx) { DxvkBlendMode cbState; DxvkLogicOpState loState; DxvkMultisampleState msState; InitDefaultBlendState(&cbState, &loState, &msState, cSampleMask); for (uint32_t i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) ctx->setBlendMode(i, cbState); ctx->setLogicOpState(loState); ctx->setMultisampleState(msState); }); } } template void D3D11CommonContext::ApplyBlendFactor() { EmitCs([ cBlendConstants = DxvkBlendConstants { m_state.om.blendFactor[0], m_state.om.blendFactor[1], m_state.om.blendFactor[2], m_state.om.blendFactor[3] } ] (DxvkContext* ctx) { ctx->setBlendConstants(cBlendConstants); }); } template void D3D11CommonContext::ApplyDepthStencilState() { if (m_state.om.dsState != nullptr) { EmitCs([ cDepthStencilState = m_state.om.dsState ] (DxvkContext* ctx) { cDepthStencilState->BindToContext(ctx); }); } else { EmitCs([] (DxvkContext* ctx) { DxvkDepthStencilState dsState; InitDefaultDepthStencilState(&dsState); ctx->setDepthStencilState(dsState); }); } } template void D3D11CommonContext::ApplyStencilRef() { EmitCs([ cStencilRef = m_state.om.stencilRef ] (DxvkContext* ctx) { ctx->setStencilReference(cStencilRef); }); } template void D3D11CommonContext::ApplyRasterizerState() { if (m_state.rs.state != nullptr) { EmitCs([ cRasterizerState = m_state.rs.state ] (DxvkContext* ctx) { cRasterizerState->BindToContext(ctx); }); } else { EmitCs([] (DxvkContext* ctx) { DxvkRasterizerState rsState; InitDefaultRasterizerState(&rsState); ctx->setRasterizerState(rsState); }); } } template void D3D11CommonContext::ApplyRasterizerSampleCount() { DxbcPushConstants pc; pc.rasterizerSampleCount = m_state.om.sampleCount; if (unlikely(!m_state.om.sampleCount)) { pc.rasterizerSampleCount = m_state.rs.state ? m_state.rs.state->Desc()->ForcedSampleCount : 0; if (!pc.rasterizerSampleCount) pc.rasterizerSampleCount = 1; } EmitCs([ cPushConstants = pc ] (DxvkContext* ctx) { ctx->pushConstants(0, sizeof(cPushConstants), &cPushConstants); }); } template void D3D11CommonContext::ApplyViewportState() { std::array viewports; std::array scissors; // The backend can't handle a viewport count of zero, // so we should at least specify one empty viewport uint32_t viewportCount = m_state.rs.numViewports; if (unlikely(!viewportCount)) { viewportCount = 1; viewports[0] = VkViewport(); scissors [0] = VkRect2D(); } // D3D11's coordinate system has its origin in the bottom left, // but the viewport coordinates are aligned to the top-left // corner so we can get away with flipping the viewport. for (uint32_t i = 0; i < m_state.rs.numViewports; i++) { const D3D11_VIEWPORT& vp = m_state.rs.viewports[i]; viewports[i] = VkViewport { vp.TopLeftX, vp.Height + vp.TopLeftY, vp.Width, -vp.Height, vp.MinDepth, vp.MaxDepth, }; } // Scissor rectangles. Vulkan does not provide an easy way // to disable the scissor test, so we'll have to set scissor // rects that are at least as large as the framebuffer. bool enableScissorTest = false; if (m_state.rs.state != nullptr) { D3D11_RASTERIZER_DESC rsDesc; m_state.rs.state->GetDesc(&rsDesc); enableScissorTest = rsDesc.ScissorEnable; } for (uint32_t i = 0; i < m_state.rs.numViewports; i++) { if (!enableScissorTest) { scissors[i] = VkRect2D { VkOffset2D { 0, 0 }, VkExtent2D { D3D11_VIEWPORT_BOUNDS_MAX, D3D11_VIEWPORT_BOUNDS_MAX } }; } else if (i >= m_state.rs.numScissors) { scissors[i] = VkRect2D { VkOffset2D { 0, 0 }, VkExtent2D { 0, 0 } }; } else { D3D11_RECT sr = m_state.rs.scissors[i]; VkOffset2D srPosA; srPosA.x = std::max(0, sr.left); srPosA.y = std::max(0, sr.top); VkOffset2D srPosB; srPosB.x = std::max(srPosA.x, sr.right); srPosB.y = std::max(srPosA.y, sr.bottom); VkExtent2D srSize; srSize.width = uint32_t(srPosB.x - srPosA.x); srSize.height = uint32_t(srPosB.y - srPosA.y); scissors[i] = VkRect2D { srPosA, srSize }; } } if (likely(viewportCount == 1)) { EmitCs([ cViewport = viewports[0], cScissor = scissors[0] ] (DxvkContext* ctx) { ctx->setViewports(1, &cViewport, &cScissor); }); } else { EmitCs([ cViewportCount = viewportCount, cViewports = viewports, cScissors = scissors ] (DxvkContext* ctx) { ctx->setViewports( cViewportCount, cViewports.data(), cScissors.data()); }); } } template template void D3D11CommonContext::BindShader( const D3D11CommonShader* pShaderModule) { if (pShaderModule) { auto buffer = pShaderModule->GetIcb(); auto shader = pShaderModule->GetShader(); if (unlikely(shader->needsLibraryCompile())) m_device->requestCompileShader(shader); EmitCs([ cBuffer = std::move(buffer), cShader = std::move(shader) ] (DxvkContext* ctx) mutable { constexpr VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); uint32_t slotId = computeConstantBufferBinding(ShaderStage, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); ctx->bindShader( Forwarder::move(cShader)); ctx->bindResourceBuffer(stage, slotId, Forwarder::move(cBuffer)); }); } else { EmitCs([] (DxvkContext* ctx) { constexpr VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); uint32_t slotId = computeConstantBufferBinding(ShaderStage, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); ctx->bindShader(nullptr); ctx->bindResourceBuffer(stage, slotId, DxvkBufferSlice()); }); } } template void D3D11CommonContext::BindFramebuffer() { DxvkRenderTargets attachments; uint32_t sampleCount = 0; // D3D11 doesn't have the concept of a framebuffer object, // so we'll just create a new one every time the render // target bindings are updated. Set up the attachments. for (UINT i = 0; i < m_state.om.rtvs.size(); i++) { if (m_state.om.rtvs[i] != nullptr) { attachments.color[i] = { m_state.om.rtvs[i]->GetImageView(), m_state.om.rtvs[i]->GetRenderLayout() }; sampleCount = m_state.om.rtvs[i]->GetSampleCount(); } } if (m_state.om.dsv != nullptr) { attachments.depth = { m_state.om.dsv->GetImageView(), m_state.om.dsv->GetRenderLayout() }; sampleCount = m_state.om.dsv->GetSampleCount(); } // Create and bind the framebuffer object to the context EmitCs([ cAttachments = std::move(attachments) ] (DxvkContext* ctx) mutable { ctx->bindRenderTargets(Forwarder::move(cAttachments), 0u); }); // If necessary, update push constant for the sample count if (m_state.om.sampleCount != sampleCount) { m_state.om.sampleCount = sampleCount; ApplyRasterizerSampleCount(); } } template void D3D11CommonContext::BindDrawBuffers( D3D11Buffer* pBufferForArgs, D3D11Buffer* pBufferForCount) { EmitCs([ cArgBuffer = pBufferForArgs ? pBufferForArgs->GetBufferSlice() : DxvkBufferSlice(), cCntBuffer = pBufferForCount ? pBufferForCount->GetBufferSlice() : DxvkBufferSlice() ] (DxvkContext* ctx) mutable { ctx->bindDrawBuffers( Forwarder::move(cArgBuffer), Forwarder::move(cCntBuffer)); }); } template void D3D11CommonContext::BindVertexBuffer( UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Stride) { if (pBuffer) { EmitCs([ cSlotId = Slot, cBufferSlice = pBuffer->GetBufferSlice(Offset), cStride = Stride ] (DxvkContext* ctx) mutable { ctx->bindVertexBuffer(cSlotId, Forwarder::move(cBufferSlice), cStride); }); } else { EmitCs([ cSlotId = Slot ] (DxvkContext* ctx) { ctx->bindVertexBuffer(cSlotId, DxvkBufferSlice(), 0); }); } } template void D3D11CommonContext::BindVertexBufferRange( UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Stride) { if (pBuffer) { VkDeviceSize offset = Offset; VkDeviceSize length = pBuffer->GetRemainingSize(Offset); EmitCs([ cSlotId = Slot, cBufferOffset = offset, cBufferLength = length, cStride = Stride ] (DxvkContext* ctx) mutable { ctx->bindVertexBufferRange(cSlotId, cBufferOffset, cBufferLength, cStride); }); } } template void D3D11CommonContext::BindIndexBuffer( D3D11Buffer* pBuffer, UINT Offset, DXGI_FORMAT Format) { VkIndexType indexType = Format == DXGI_FORMAT_R16_UINT ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; if (pBuffer) { EmitCs([ cBufferSlice = pBuffer->GetBufferSlice(Offset), cIndexType = indexType ] (DxvkContext* ctx) mutable { ctx->bindIndexBuffer( Forwarder::move(cBufferSlice), cIndexType); }); } else { EmitCs([ cIndexType = indexType ] (DxvkContext* ctx) { ctx->bindIndexBuffer(DxvkBufferSlice(), cIndexType); }); } } template void D3D11CommonContext::BindIndexBufferRange( D3D11Buffer* pBuffer, UINT Offset, DXGI_FORMAT Format) { if (pBuffer) { VkIndexType indexType = Format == DXGI_FORMAT_R16_UINT ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; VkDeviceSize offset = Offset; VkDeviceSize length = pBuffer->GetRemainingSize(Offset); EmitCs([ cBufferOffset = offset, cBufferLength = length, cIndexType = indexType ] (DxvkContext* ctx) mutable { ctx->bindIndexBufferRange( cBufferOffset, cBufferLength, cIndexType); }); } } template void D3D11CommonContext::BindXfbBuffer( UINT Slot, D3D11Buffer* pBuffer, UINT Offset) { if (pBuffer) { EmitCs([ cSlotId = Slot, cOffset = Offset, cBufferSlice = pBuffer->GetBufferSlice(), cCounterSlice = pBuffer->GetSOCounter() ] (DxvkContext* ctx) mutable { if (cCounterSlice.defined() && cOffset != ~0u) { ctx->updateBuffer( cCounterSlice.buffer(), cCounterSlice.offset(), sizeof(cOffset), &cOffset); } ctx->bindXfbBuffer(cSlotId, Forwarder::move(cBufferSlice), Forwarder::move(cCounterSlice)); }); } else { EmitCs([ cSlotId = Slot ] (DxvkContext* ctx) { ctx->bindXfbBuffer(cSlotId, DxvkBufferSlice(), DxvkBufferSlice()); }); } } template template void D3D11CommonContext::BindConstantBuffer( UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Length) { if (pBuffer) { EmitCs([ cSlotId = Slot, cBufferSlice = pBuffer->GetBufferSlice(16 * Offset, 16 * Length) ] (DxvkContext* ctx) mutable { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceBuffer(stage, cSlotId, Forwarder::move(cBufferSlice)); }); } else { EmitCs([ cSlotId = Slot ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceBuffer(stage, cSlotId, DxvkBufferSlice()); }); } } template template void D3D11CommonContext::BindConstantBufferRange( UINT Slot, UINT Offset, UINT Length) { EmitCs([ cSlotId = Slot, cOffset = 16 * Offset, cLength = 16 * Length ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceBufferRange(stage, cSlotId, cOffset, cLength); }); } template template void D3D11CommonContext::BindSampler( UINT Slot, D3D11SamplerState* pSampler) { if (pSampler) { EmitCs([ cSlotId = Slot, cSampler = pSampler->GetDXVKSampler() ] (DxvkContext* ctx) mutable { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceSampler(stage, cSlotId, Forwarder::move(cSampler)); }); } else { EmitCs([ cSlotId = Slot ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceSampler(stage, cSlotId, nullptr); }); } } template template void D3D11CommonContext::BindShaderResource( UINT Slot, D3D11ShaderResourceView* pResource) { if (pResource) { if (pResource->GetViewInfo().Dimension != D3D11_RESOURCE_DIMENSION_BUFFER) { EmitCs([ cSlotId = Slot, cView = pResource->GetImageView() ] (DxvkContext* ctx) mutable { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceImageView(stage, cSlotId, Forwarder::move(cView)); }); } else { EmitCs([ cSlotId = Slot, cView = pResource->GetBufferView() ] (DxvkContext* ctx) mutable { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceBufferView(stage, cSlotId, Forwarder::move(cView)); }); } } else { EmitCs([ cSlotId = Slot ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceImageView(stage, cSlotId, nullptr); }); } } template template void D3D11CommonContext::BindUnorderedAccessView( UINT UavSlot, D3D11UnorderedAccessView* pUav, UINT CtrSlot, UINT Counter) { if (pUav) { if (pUav->GetViewInfo().Dimension == D3D11_RESOURCE_DIMENSION_BUFFER) { EmitCs([ cUavSlotId = UavSlot, cCtrSlotId = CtrSlot, cBufferView = pUav->GetBufferView(), cCounterSlice = pUav->GetCounterSlice(), cCounterValue = Counter ] (DxvkContext* ctx) mutable { VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS; if (cCounterSlice.defined() && cCounterValue != ~0u) { ctx->updateBuffer( cCounterSlice.buffer(), cCounterSlice.offset(), sizeof(uint32_t), &cCounterValue); } ctx->bindResourceBufferView(stages, cUavSlotId, Forwarder::move(cBufferView)); ctx->bindResourceBuffer(stages, cCtrSlotId, Forwarder::move(cCounterSlice)); }); } else { EmitCs([ cUavSlotId = UavSlot, cCtrSlotId = CtrSlot, cImageView = pUav->GetImageView() ] (DxvkContext* ctx) mutable { VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS; ctx->bindResourceImageView(stages, cUavSlotId, Forwarder::move(cImageView)); ctx->bindResourceBuffer(stages, cCtrSlotId, DxvkBufferSlice()); }); } } else { EmitCs([ cUavSlotId = UavSlot, cCtrSlotId = CtrSlot ] (DxvkContext* ctx) { VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS; ctx->bindResourceImageView(stages, cUavSlotId, nullptr); ctx->bindResourceBuffer(stages, cCtrSlotId, DxvkBufferSlice()); }); } } template VkClearValue D3D11CommonContext::ConvertColorValue( const FLOAT Color[4], const DxvkFormatInfo* pFormatInfo) { VkClearValue result; if (pFormatInfo->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { for (uint32_t i = 0; i < 4; i++) { if (pFormatInfo->flags.test(DxvkFormatFlag::SampledUInt)) result.color.uint32[i] = uint32_t(std::max(0.0f, Color[i])); else if (pFormatInfo->flags.test(DxvkFormatFlag::SampledSInt)) result.color.int32[i] = int32_t(Color[i]); else result.color.float32[i] = Color[i]; } } else { result.depthStencil.depth = Color[0]; result.depthStencil.stencil = 0; } return result; } template void D3D11CommonContext::CopyBuffer( D3D11Buffer* pDstBuffer, VkDeviceSize DstOffset, D3D11Buffer* pSrcBuffer, VkDeviceSize SrcOffset, VkDeviceSize ByteCount) { // Clamp copy region to prevent out-of-bounds access VkDeviceSize dstLength = pDstBuffer->Desc()->ByteWidth; VkDeviceSize srcLength = pSrcBuffer->Desc()->ByteWidth; if (SrcOffset >= srcLength || DstOffset >= dstLength || !ByteCount) return; ByteCount = std::min(dstLength - DstOffset, ByteCount); ByteCount = std::min(srcLength - SrcOffset, ByteCount); EmitCs([ cDstBuffer = pDstBuffer->GetBufferSlice(DstOffset, ByteCount), cSrcBuffer = pSrcBuffer->GetBufferSlice(SrcOffset, ByteCount) ] (DxvkContext* ctx) { if (cDstBuffer.buffer() != cSrcBuffer.buffer()) { ctx->copyBuffer( cDstBuffer.buffer(), cDstBuffer.offset(), cSrcBuffer.buffer(), cSrcBuffer.offset(), cSrcBuffer.length()); } else { ctx->copyBufferRegion( cDstBuffer.buffer(), cDstBuffer.offset(), cSrcBuffer.offset(), cSrcBuffer.length()); } }); if (pDstBuffer->HasSequenceNumber()) GetTypedContext()->TrackBufferSequenceNumber(pDstBuffer); if (pSrcBuffer->HasSequenceNumber()) GetTypedContext()->TrackBufferSequenceNumber(pSrcBuffer); } template void D3D11CommonContext::CopyImage( D3D11CommonTexture* pDstTexture, const VkImageSubresourceLayers* pDstLayers, VkOffset3D DstOffset, D3D11CommonTexture* pSrcTexture, const VkImageSubresourceLayers* pSrcLayers, VkOffset3D SrcOffset, VkExtent3D SrcExtent) { // Image formats must be size-compatible auto dstFormatInfo = lookupFormatInfo(pDstTexture->GetPackedFormat()); auto srcFormatInfo = lookupFormatInfo(pSrcTexture->GetPackedFormat()); if (dstFormatInfo->elementSize != srcFormatInfo->elementSize) return; // Sample counts must match if (pDstTexture->Desc()->SampleDesc.Count != pSrcTexture->Desc()->SampleDesc.Count) return; // Obviously, the copy region must not be empty VkExtent3D dstMipExtent = pDstTexture->MipLevelExtent(pDstLayers->mipLevel); VkExtent3D srcMipExtent = pSrcTexture->MipLevelExtent(pSrcLayers->mipLevel); if (uint32_t(DstOffset.x) >= dstMipExtent.width || uint32_t(DstOffset.y) >= dstMipExtent.height || uint32_t(DstOffset.z) >= dstMipExtent.depth) return; if (uint32_t(SrcOffset.x) >= srcMipExtent.width || uint32_t(SrcOffset.y) >= srcMipExtent.height || uint32_t(SrcOffset.z) >= srcMipExtent.depth) return; // Don't perform the copy if the offsets aren't block-aligned if (!util::isBlockAligned(SrcOffset, srcFormatInfo->blockSize) || !util::isBlockAligned(DstOffset, dstFormatInfo->blockSize)) return; // Clamp the image region in order to avoid out-of-bounds access VkExtent3D blockCount = util::computeBlockCount(SrcExtent, srcFormatInfo->blockSize); VkExtent3D dstBlockCount = util::computeMaxBlockCount(DstOffset, dstMipExtent, dstFormatInfo->blockSize); VkExtent3D srcBlockCount = util::computeMaxBlockCount(SrcOffset, srcMipExtent, srcFormatInfo->blockSize); blockCount = util::minExtent3D(blockCount, dstBlockCount); blockCount = util::minExtent3D(blockCount, srcBlockCount); SrcExtent = util::computeBlockExtent(blockCount, srcFormatInfo->blockSize); SrcExtent = util::snapExtent3D(SrcOffset, SrcExtent, srcMipExtent); if (!SrcExtent.width || !SrcExtent.height || !SrcExtent.depth) return; // While copying between 2D and 3D images is allowed in CopySubresourceRegion, // copying more than one slice at a time is not suppoted. Layer counts are 1. if ((pDstTexture->GetVkImageType() == VK_IMAGE_TYPE_3D) != (pSrcTexture->GetVkImageType() == VK_IMAGE_TYPE_3D)) SrcExtent.depth = 1; // Certain types of copies require us to pass the destination extent to // the backend. This may be different when copying between compressed // and uncompressed image formats. VkExtent3D dstExtent = util::computeBlockExtent(blockCount, dstFormatInfo->blockSize); dstExtent = util::snapExtent3D(DstOffset, dstExtent, dstMipExtent); // It is possible for any of the given images to be a staging image with // no actual image, so we need to account for all possibilities here. bool dstIsImage = pDstTexture->GetMapMode() != D3D11_COMMON_TEXTURE_MAP_MODE_STAGING; bool srcIsImage = pSrcTexture->GetMapMode() != D3D11_COMMON_TEXTURE_MAP_MODE_STAGING; if (dstIsImage && srcIsImage) { EmitCs([ cDstImage = pDstTexture->GetImage(), cSrcImage = pSrcTexture->GetImage(), cDstLayers = *pDstLayers, cSrcLayers = *pSrcLayers, cDstOffset = DstOffset, cSrcOffset = SrcOffset, cExtent = SrcExtent ] (DxvkContext* ctx) { // CopyResource can only copy between different images, and // CopySubresourceRegion can only copy data from one single // subresource at a time, so this check is safe. if (cDstImage != cSrcImage || cDstLayers != cSrcLayers) { ctx->copyImage( cDstImage, cDstLayers, cDstOffset, cSrcImage, cSrcLayers, cSrcOffset, cExtent); } else { ctx->copyImageRegion( cDstImage, cDstLayers, cDstOffset, cSrcOffset, cExtent); } }); } else { // Since each subresource uses a dedicated buffer, we are going // to need one call per subresource for staging resource copies for (uint32_t i = 0; i < pDstLayers->layerCount; i++) { uint32_t dstSubresource = D3D11CalcSubresource(pDstLayers->mipLevel, pDstLayers->baseArrayLayer + i, pDstTexture->Desc()->MipLevels); uint32_t srcSubresource = D3D11CalcSubresource(pSrcLayers->mipLevel, pSrcLayers->baseArrayLayer + i, pSrcTexture->Desc()->MipLevels); // For multi-plane image data stored in a buffer, the backend // assumes that the second plane immediately follows the first // plane in memory, which is only true if we copy the full image. uint32_t planeCount = 1; if (dstFormatInfo->flags.test(DxvkFormatFlag::MultiPlane)) { bool needsSeparateCopies = !dstIsImage && !srcIsImage; if (!dstIsImage) needsSeparateCopies |= pDstTexture->MipLevelExtent(pDstLayers->mipLevel) != SrcExtent; if (!srcIsImage) needsSeparateCopies |= pSrcTexture->MipLevelExtent(pSrcLayers->mipLevel) != SrcExtent; if (needsSeparateCopies) planeCount = vk::getPlaneCount(srcFormatInfo->aspectMask); } for (uint32_t j = 0; j < planeCount; j++) { VkImageAspectFlags dstAspectMask = dstFormatInfo->aspectMask; VkImageAspectFlags srcAspectMask = srcFormatInfo->aspectMask; if (planeCount > 1) { dstAspectMask = vk::getPlaneAspect(j); srcAspectMask = dstAspectMask; } if (dstIsImage) { VkImageSubresourceLayers dstLayer = { dstAspectMask, pDstLayers->mipLevel, pDstLayers->baseArrayLayer + i, 1 }; EmitCs([ cDstImage = pDstTexture->GetImage(), cDstLayers = dstLayer, cDstOffset = DstOffset, cDstExtent = dstExtent, cSrcBuffer = pSrcTexture->GetMappedBuffer(srcSubresource), cSrcLayout = pSrcTexture->GetSubresourceLayout(srcAspectMask, srcSubresource), cSrcOffset = pSrcTexture->ComputeMappedOffset(srcSubresource, j, SrcOffset), cSrcCoord = SrcOffset, cSrcExtent = srcMipExtent, cSrcFormat = pSrcTexture->GetPackedFormat() ] (DxvkContext* ctx) { if (cDstLayers.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyBufferToImage(cDstImage, cDstLayers, cDstOffset, cDstExtent, cSrcBuffer, cSrcOffset, cSrcLayout.RowPitch, cSrcLayout.DepthPitch); } else { ctx->copyPackedBufferToDepthStencilImage(cDstImage, cDstLayers, VkOffset2D { cDstOffset.x, cDstOffset.y }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cSrcBuffer, cSrcLayout.Offset, VkOffset2D { cSrcCoord.x, cSrcCoord.y }, VkExtent2D { cSrcExtent.width, cSrcExtent.height }, cSrcFormat); } }); } else if (srcIsImage) { VkImageSubresourceLayers srcLayer = { srcAspectMask, pSrcLayers->mipLevel, pSrcLayers->baseArrayLayer + i, 1 }; EmitCs([ cSrcImage = pSrcTexture->GetImage(), cSrcLayers = srcLayer, cSrcOffset = SrcOffset, cSrcExtent = SrcExtent, cDstBuffer = pDstTexture->GetMappedBuffer(dstSubresource), cDstLayout = pDstTexture->GetSubresourceLayout(dstAspectMask, dstSubresource), cDstOffset = pDstTexture->ComputeMappedOffset(dstSubresource, j, DstOffset), cDstCoord = DstOffset, cDstExtent = dstMipExtent, cDstFormat = pDstTexture->GetPackedFormat() ] (DxvkContext* ctx) { if (cSrcLayers.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyImageToBuffer(cDstBuffer, cDstOffset, cDstLayout.RowPitch, cDstLayout.DepthPitch, cSrcImage, cSrcLayers, cSrcOffset, cSrcExtent); } else { ctx->copyDepthStencilImageToPackedBuffer(cDstBuffer, cDstLayout.Offset, VkOffset2D { cDstCoord.x, cDstCoord.y }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cSrcImage, cSrcLayers, VkOffset2D { cSrcOffset.x, cSrcOffset.y }, VkExtent2D { cSrcExtent.width, cSrcExtent.height }, cDstFormat); } }); } else { // The backend is not aware of image metadata in this case, // so we need to handle image planes and block sizes here VkDeviceSize elementSize = dstFormatInfo->elementSize; VkExtent3D dstBlockSize = dstFormatInfo->blockSize; VkExtent3D srcBlockSize = srcFormatInfo->blockSize; VkExtent3D planeBlockSize = { 1u, 1u, 1u }; if (planeCount > 1) { auto plane = &dstFormatInfo->planes[j]; dstBlockSize.width *= plane->blockSize.width; dstBlockSize.height *= plane->blockSize.height; srcBlockSize.width *= plane->blockSize.width; srcBlockSize.height *= plane->blockSize.height; planeBlockSize.width = plane->blockSize.width; planeBlockSize.height = plane->blockSize.height; elementSize = plane->elementSize; } EmitCs([ cPixelSize = elementSize, cSrcBuffer = pSrcTexture->GetMappedBuffer(srcSubresource), cSrcStart = pSrcTexture->GetSubresourceLayout(srcAspectMask, srcSubresource).Offset, cSrcOffset = util::computeBlockOffset(SrcOffset, srcBlockSize), cSrcSize = util::computeBlockCount(srcMipExtent, srcBlockSize), cDstBuffer = pDstTexture->GetMappedBuffer(dstSubresource), cDstStart = pDstTexture->GetSubresourceLayout(dstAspectMask, dstSubresource).Offset, cDstOffset = util::computeBlockOffset(DstOffset, dstBlockSize), cDstSize = util::computeBlockCount(dstMipExtent, dstBlockSize), cExtent = util::computeBlockCount(blockCount, planeBlockSize) ] (DxvkContext* ctx) { ctx->copyPackedBufferImage( cDstBuffer, cDstStart, cDstOffset, cDstSize, cSrcBuffer, cSrcStart, cSrcOffset, cSrcSize, cExtent, cPixelSize); }); } } } } if (pDstTexture->HasSequenceNumber()) { for (uint32_t i = 0; i < pDstLayers->layerCount; i++) { GetTypedContext()->TrackTextureSequenceNumber(pDstTexture, D3D11CalcSubresource( pDstLayers->mipLevel, pDstLayers->baseArrayLayer + i, pDstTexture->Desc()->MipLevels)); } } if (pSrcTexture->HasSequenceNumber()) { for (uint32_t i = 0; i < pSrcLayers->layerCount; i++) { GetTypedContext()->TrackTextureSequenceNumber(pSrcTexture, D3D11CalcSubresource( pSrcLayers->mipLevel, pSrcLayers->baseArrayLayer + i, pSrcTexture->Desc()->MipLevels)); } } } template void D3D11CommonContext::CopyTiledResourceData( ID3D11Resource* pResource, const D3D11_TILED_RESOURCE_COORDINATE* pRegionCoordinate, const D3D11_TILE_REGION_SIZE* pRegionSize, DxvkBufferSlice BufferSlice, UINT Flags) { Rc resource = GetPagedResource(pResource); // Do some validation based on page table properties auto pageTable = resource->getSparsePageTable(); if (!pageTable) return; if (pRegionSize->bUseBox && pRegionSize->NumTiles != pRegionSize->Width * pRegionSize->Height * pRegionSize->Depth) return; if (pRegionSize->NumTiles > pageTable->getPageCount()) return; // Ignore call if buffer access would be out of bounds VkDeviceSize bufferSize = pRegionSize->NumTiles * SparseMemoryPageSize; if (BufferSlice.length() < bufferSize) return; // Compute list of tile indices to copy std::vector tiles(pRegionSize->NumTiles); for (uint32_t i = 0; i < pRegionSize->NumTiles; i++) { VkOffset3D regionOffset = { int32_t(pRegionCoordinate->X), int32_t(pRegionCoordinate->Y), int32_t(pRegionCoordinate->Z) }; VkExtent3D regionExtent = { uint32_t(pRegionSize->Width), uint32_t(pRegionSize->Height), uint32_t(pRegionSize->Depth) }; uint32_t tile = pageTable->computePageIndex( pRegionCoordinate->Subresource, regionOffset, regionExtent, !pRegionSize->bUseBox, i); // Check that the tile is valid and not part of the mip tail auto tileInfo = pageTable->getPageInfo(tile); if (tileInfo.type != DxvkSparsePageType::Buffer && tileInfo.type != DxvkSparsePageType::Image) return; tiles[i] = tile; } // If D3D12 is anything to go by, not passing this flag will trigger // the other code path, regardless of whether TO_LINEAR_BUFFER is set. if (Flags & D3D11_TILE_COPY_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE) { EmitCs([ cResource = std::move(resource), cTiles = std::move(tiles), cBuffer = std::move(BufferSlice) ] (DxvkContext* ctx) { ctx->copySparsePagesFromBuffer( cResource, cTiles.size(), cTiles.data(), cBuffer.buffer(), cBuffer.offset()); }); } else { EmitCs([ cResource = std::move(resource), cTiles = std::move(tiles), cBuffer = std::move(BufferSlice) ] (DxvkContext* ctx) { ctx->copySparsePagesToBuffer( cBuffer.buffer(), cBuffer.offset(), cResource, cTiles.size(), cTiles.data()); }); } } template void D3D11CommonContext::DiscardBuffer( ID3D11Resource* pResource) { auto buffer = static_cast(pResource); if (buffer->GetMapMode() != D3D11_COMMON_BUFFER_MAP_MODE_NONE) { D3D11_MAPPED_SUBRESOURCE sr; Map(pResource, 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); Unmap(pResource, 0); } } template void D3D11CommonContext::DiscardTexture( ID3D11Resource* pResource, UINT Subresource) { auto texture = GetCommonTexture(pResource); if (texture->GetMapMode() != D3D11_COMMON_TEXTURE_MAP_MODE_NONE) { D3D11_MAPPED_SUBRESOURCE sr; Map(pResource, Subresource, D3D11_MAP_WRITE_DISCARD, 0, &sr); Unmap(pResource, Subresource); } } template template void D3D11CommonContext::GetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer** ppConstantBuffers, UINT* pFirstConstant, UINT* pNumConstants) { const auto& bindings = m_state.cbv[ShaderStage]; for (uint32_t i = 0; i < NumBuffers; i++) { const bool inRange = StartSlot + i < bindings.buffers.size(); if (ppConstantBuffers) { ppConstantBuffers[i] = inRange ? bindings.buffers[StartSlot + i].buffer.ref() : nullptr; } if (pFirstConstant) { pFirstConstant[i] = inRange ? bindings.buffers[StartSlot + i].constantOffset : 0u; } if (pNumConstants) { pNumConstants[i] = inRange ? bindings.buffers[StartSlot + i].constantCount : 0u; } } } template template void D3D11CommonContext::GetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView** ppShaderResourceViews) { const auto& bindings = m_state.srv[ShaderStage]; for (uint32_t i = 0; i < NumViews; i++) { ppShaderResourceViews[i] = StartSlot + i < bindings.views.size() ? bindings.views[StartSlot + i].ref() : nullptr; } } template template void D3D11CommonContext::GetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState** ppSamplers) { const auto& bindings = m_state.samplers[ShaderStage]; for (uint32_t i = 0; i < NumSamplers; i++) { ppSamplers[i] = StartSlot + i < bindings.samplers.size() ? ref(bindings.samplers[StartSlot + i]) : nullptr; } } template DxvkGlobalPipelineBarrier D3D11CommonContext::GetTiledResourceDependency( ID3D11DeviceChild* pObject) { if (!pObject) { DxvkGlobalPipelineBarrier result; result.stages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; result.access = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT; return result; } else { Com resource; if (FAILED(pObject->QueryInterface(IID_PPV_ARGS(&resource)))) { Com view; if (FAILED(pObject->QueryInterface(IID_PPV_ARGS(&view)))) return DxvkGlobalPipelineBarrier(); view->GetResource(&resource); } D3D11CommonTexture* texture = GetCommonTexture(resource.ptr()); if (texture) { Rc image = texture->GetImage(); DxvkGlobalPipelineBarrier result; result.stages = image->info().stages; result.access = image->info().access; return result; } else { Rc buffer = static_cast(resource.ptr())->GetBuffer(); if (buffer == nullptr) return DxvkGlobalPipelineBarrier(); DxvkGlobalPipelineBarrier result; result.stages = buffer->info().stages; result.access = buffer->info().access; return result; } } } template D3D11MaxUsedBindings D3D11CommonContext::GetMaxUsedBindings() { D3D11MaxUsedBindings result; for (uint32_t i = 0; i < result.stages.size(); i++) { auto stage = DxbcProgramType(i); result.stages[i].cbvCount = m_state.cbv[stage].maxCount; result.stages[i].srvCount = m_state.srv[stage].maxCount; result.stages[i].uavCount = 0; result.stages[i].samplerCount = m_state.samplers[stage].maxCount; result.stages[i].reserved = 0; } result.stages[uint32_t(DxbcProgramType::PixelShader)].uavCount = m_state.om.maxUav; result.stages[uint32_t(DxbcProgramType::ComputeShader)].uavCount = m_state.uav.maxCount; result.vbCount = m_state.ia.maxVbCount; result.soCount = D3D11_SO_BUFFER_SLOT_COUNT; return result; } template void D3D11CommonContext::ResetCommandListState() { EmitCs([ cUsedBindings = GetMaxUsedBindings() ] (DxvkContext* ctx) { // Reset render targets ctx->bindRenderTargets(DxvkRenderTargets(), 0u); // Reset vertex input state ctx->setInputLayout(0, nullptr, 0, nullptr); // Reset render states DxvkInputAssemblyState iaState; InitDefaultPrimitiveTopology(&iaState); DxvkDepthStencilState dsState; InitDefaultDepthStencilState(&dsState); DxvkRasterizerState rsState; InitDefaultRasterizerState(&rsState); DxvkBlendMode cbState; DxvkLogicOpState loState; DxvkMultisampleState msState; InitDefaultBlendState(&cbState, &loState, &msState, D3D11_DEFAULT_SAMPLE_MASK); ctx->setInputAssemblyState(iaState); ctx->setDepthStencilState(dsState); ctx->setRasterizerState(rsState); ctx->setLogicOpState(loState); ctx->setMultisampleState(msState); for (uint32_t i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) ctx->setBlendMode(i, cbState); // Reset dynamic states ctx->setBlendConstants(DxvkBlendConstants { 1.0f, 1.0f, 1.0f, 1.0f }); ctx->setStencilReference(D3D11_DEFAULT_STENCIL_REFERENCE); // Reset viewports auto viewport = VkViewport(); auto scissor = VkRect2D(); ctx->setViewports(1, &viewport, &scissor); // Unbind indirect draw buffer ctx->bindDrawBuffers(DxvkBufferSlice(), DxvkBufferSlice()); // Unbind index and vertex buffers ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32); for (uint32_t i = 0; i < cUsedBindings.vbCount; i++) ctx->bindVertexBuffer(i, DxvkBufferSlice(), 0); // Unbind transform feedback buffers for (uint32_t i = 0; i < cUsedBindings.soCount; i++) ctx->bindXfbBuffer(i, DxvkBufferSlice(), DxvkBufferSlice()); // Unbind all shaders ctx->bindShader(nullptr); ctx->bindShader(nullptr); ctx->bindShader(nullptr); ctx->bindShader(nullptr); ctx->bindShader(nullptr); ctx->bindShader(nullptr); // Unbind per-shader stage resources for (uint32_t i = 0; i < 6; i++) { auto programType = DxbcProgramType(i); auto stage = GetShaderStage(programType); // Unbind constant buffers, including the shader's ICB auto cbSlotId = computeConstantBufferBinding(programType, 0); ctx->bindResourceBuffer(stage, cbSlotId + D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, DxvkBufferSlice()); for (uint32_t j = 0; j < cUsedBindings.stages[i].cbvCount; j++) ctx->bindResourceBuffer(stage, cbSlotId + j, DxvkBufferSlice()); // Unbind shader resource views auto srvSlotId = computeSrvBinding(programType, 0); for (uint32_t j = 0; j < cUsedBindings.stages[i].srvCount; j++) ctx->bindResourceImageView(stage, srvSlotId + j, nullptr); // Unbind texture samplers auto samplerSlotId = computeSamplerBinding(programType, 0); for (uint32_t j = 0; j < cUsedBindings.stages[i].samplerCount; j++) ctx->bindResourceSampler(stage, samplerSlotId + j, nullptr); // Unbind UAVs for supported stages if (programType == DxbcProgramType::PixelShader || programType == DxbcProgramType::ComputeShader) { VkShaderStageFlags stages = programType == DxbcProgramType::PixelShader ? VK_SHADER_STAGE_ALL_GRAPHICS : VK_SHADER_STAGE_COMPUTE_BIT; auto uavSlotId = computeUavBinding(programType, 0); auto ctrSlotId = computeUavCounterBinding(programType, 0); for (uint32_t j = 0; j < cUsedBindings.stages[i].uavCount; j++) { ctx->bindResourceImageView(stages, uavSlotId, nullptr); ctx->bindResourceBuffer(stages, ctrSlotId, DxvkBufferSlice()); } } } // Initialize push constants DxbcPushConstants pc; pc.rasterizerSampleCount = 1; ctx->pushConstants(0, sizeof(pc), &pc); }); } template void D3D11CommonContext::ResetContextState() { // Reset shaders m_state.vs = nullptr; m_state.hs = nullptr; m_state.ds = nullptr; m_state.gs = nullptr; m_state.ps = nullptr; m_state.cs = nullptr; // Reset render state m_state.id.reset(); m_state.ia.reset(); m_state.om.reset(); m_state.rs.reset(); m_state.so.reset(); m_state.pr.reset(); // Reset resource bindings m_state.cbv.reset(); m_state.srv.reset(); m_state.uav.reset(); m_state.samplers.reset(); } template void D3D11CommonContext::ResetStagingBuffer() { m_staging.reset(); } template template void D3D11CommonContext::ResolveSrvHazards( T* pView) { auto& bindings = m_state.srv[ShaderStage]; uint32_t slotId = computeSrvBinding(ShaderStage, 0); int32_t srvId = bindings.hazardous.findNext(0); while (srvId >= 0) { auto srv = bindings.views[srvId].ptr(); if (likely(srv && srv->TestHazards())) { bool hazard = CheckViewOverlap(pView, srv); if (unlikely(hazard)) { bindings.views[srvId] = nullptr; bindings.hazardous.clr(srvId); BindShaderResource(slotId + srvId, nullptr); } } else { // Avoid further redundant iterations bindings.hazardous.clr(srvId); } srvId = bindings.hazardous.findNext(srvId + 1); } } template template void D3D11CommonContext::ResolveCsSrvHazards( T* pView) { if (!pView) return; ResolveSrvHazards(pView); } template template void D3D11CommonContext::ResolveOmSrvHazards( T* pView) { if (!pView) return; ResolveSrvHazards(pView); ResolveSrvHazards(pView); ResolveSrvHazards(pView); ResolveSrvHazards(pView); ResolveSrvHazards(pView); } template bool D3D11CommonContext::ResolveOmRtvHazards( D3D11UnorderedAccessView* pView) { if (!pView || !pView->HasBindFlag(D3D11_BIND_RENDER_TARGET)) return false; bool hazard = false; if (CheckViewOverlap(pView, m_state.om.dsv.ptr())) { m_state.om.dsv = nullptr; hazard = true; } for (uint32_t i = 0; i < m_state.om.maxRtv; i++) { if (CheckViewOverlap(pView, m_state.om.rtvs[i].ptr())) { m_state.om.rtvs[i] = nullptr; hazard = true; } } return hazard; } template void D3D11CommonContext::ResolveOmUavHazards( D3D11RenderTargetView* pView) { if (!pView || !pView->HasBindFlag(D3D11_BIND_UNORDERED_ACCESS)) return; uint32_t uavSlotId = computeUavBinding (DxbcProgramType::PixelShader, 0); uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::PixelShader, 0); for (uint32_t i = 0; i < m_state.om.maxUav; i++) { if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) { m_state.om.uavs[i] = nullptr; BindUnorderedAccessView( uavSlotId + i, nullptr, ctrSlotId + i, ~0u); } } } template void D3D11CommonContext::RestoreCommandListState() { BindFramebuffer(); BindShader(GetCommonShader(m_state.vs.ptr())); BindShader(GetCommonShader(m_state.hs.ptr())); BindShader(GetCommonShader(m_state.ds.ptr())); BindShader(GetCommonShader(m_state.gs.ptr())); BindShader(GetCommonShader(m_state.ps.ptr())); BindShader(GetCommonShader(m_state.cs.ptr())); ApplyInputLayout(); ApplyPrimitiveTopology(); ApplyBlendState(); ApplyBlendFactor(); ApplyDepthStencilState(); ApplyStencilRef(); ApplyRasterizerState(); ApplyRasterizerSampleCount(); ApplyViewportState(); BindDrawBuffers( m_state.id.argBuffer.ptr(), m_state.id.cntBuffer.ptr()); BindIndexBuffer( m_state.ia.indexBuffer.buffer.ptr(), m_state.ia.indexBuffer.offset, m_state.ia.indexBuffer.format); for (uint32_t i = 0; i < m_state.ia.maxVbCount; i++) { BindVertexBuffer(i, m_state.ia.vertexBuffers[i].buffer.ptr(), m_state.ia.vertexBuffers[i].offset, m_state.ia.vertexBuffers[i].stride); } for (uint32_t i = 0; i < m_state.so.targets.size(); i++) BindXfbBuffer(i, m_state.so.targets[i].buffer.ptr(), ~0u); RestoreConstantBuffers(); RestoreConstantBuffers(); RestoreConstantBuffers(); RestoreConstantBuffers(); RestoreConstantBuffers(); RestoreConstantBuffers(); RestoreShaderResources(); RestoreShaderResources(); RestoreShaderResources(); RestoreShaderResources(); RestoreShaderResources(); RestoreShaderResources(); RestoreUnorderedAccessViews(); RestoreUnorderedAccessViews(); RestoreSamplers(); RestoreSamplers(); RestoreSamplers(); RestoreSamplers(); RestoreSamplers(); RestoreSamplers(); } template template void D3D11CommonContext::RestoreConstantBuffers() { const auto& bindings = m_state.cbv[Stage]; uint32_t slotId = computeConstantBufferBinding(Stage, 0); for (uint32_t i = 0; i < bindings.maxCount; i++) { BindConstantBuffer(slotId + i, bindings.buffers[i].buffer.ptr(), bindings.buffers[i].constantOffset, bindings.buffers[i].constantBound); } } template template void D3D11CommonContext::RestoreSamplers() { const auto& bindings = m_state.samplers[Stage]; uint32_t slotId = computeSamplerBinding(Stage, 0); for (uint32_t i = 0; i < bindings.maxCount; i++) BindSampler(slotId + i, bindings.samplers[i]); } template template void D3D11CommonContext::RestoreShaderResources() { const auto& bindings = m_state.srv[Stage]; uint32_t slotId = computeSrvBinding(Stage, 0); for (uint32_t i = 0; i < bindings.maxCount; i++) BindShaderResource(slotId + i, bindings.views[i].ptr()); } template template void D3D11CommonContext::RestoreUnorderedAccessViews() { const auto& views = Stage == DxbcProgramType::ComputeShader ? m_state.uav.views : m_state.om.uavs; uint32_t maxCount = Stage == DxbcProgramType::ComputeShader ? m_state.uav.maxCount : m_state.om.maxUav; uint32_t uavSlotId = computeUavBinding(Stage, 0); uint32_t ctrSlotId = computeUavCounterBinding(Stage, 0); for (uint32_t i = 0; i < maxCount; i++) { BindUnorderedAccessView( uavSlotId + i, views[i].ptr(), ctrSlotId + i, ~0u); } } template template void D3D11CommonContext::SetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { auto& bindings = m_state.cbv[ShaderStage]; uint32_t slotId = computeConstantBufferBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumBuffers; i++) { auto newBuffer = static_cast(ppConstantBuffers[i]); uint32_t constantCount = newBuffer ? std::min(newBuffer->Desc()->ByteWidth / 16, UINT(D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT)) : 0u; if (bindings.buffers[StartSlot + i].buffer != newBuffer || bindings.buffers[StartSlot + i].constantOffset != 0 || bindings.buffers[StartSlot + i].constantCount != constantCount) { bindings.buffers[StartSlot + i].buffer = newBuffer; bindings.buffers[StartSlot + i].constantOffset = 0; bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantBound = constantCount; BindConstantBuffer(slotId + i, newBuffer, 0, constantCount); } } bindings.maxCount = std::clamp(StartSlot + NumBuffers, bindings.maxCount, uint32_t(bindings.buffers.size())); } template template void D3D11CommonContext::SetConstantBuffers1( UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers, const UINT* pFirstConstant, const UINT* pNumConstants) { auto& bindings = m_state.cbv[ShaderStage]; uint32_t slotId = computeConstantBufferBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumBuffers; i++) { auto newBuffer = static_cast(ppConstantBuffers[i]); UINT constantOffset; UINT constantCount; UINT constantBound; if (likely(newBuffer != nullptr)) { UINT bufferConstantsCount = newBuffer->Desc()->ByteWidth / 16; constantBound = std::min(bufferConstantsCount, UINT(D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT)); if (likely(pFirstConstant && pNumConstants)) { constantOffset = pFirstConstant[i]; constantCount = pNumConstants [i]; if (unlikely(constantCount > D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT)) continue; constantBound = (constantOffset + constantCount > bufferConstantsCount) ? bufferConstantsCount - std::min(constantOffset, bufferConstantsCount) : constantCount; } else { constantOffset = 0; constantCount = constantBound; } } else { constantOffset = 0; constantCount = 0; constantBound = 0; } // Do a full rebind if either the buffer changes if (bindings.buffers[StartSlot + i].buffer != newBuffer) { bindings.buffers[StartSlot + i].buffer = newBuffer; bindings.buffers[StartSlot + i].constantOffset = constantOffset; bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantBound = constantBound; BindConstantBuffer(slotId + i, newBuffer, constantOffset, constantBound); } else if (bindings.buffers[StartSlot + i].constantOffset != constantOffset || bindings.buffers[StartSlot + i].constantCount != constantCount) { bindings.buffers[StartSlot + i].constantOffset = constantOffset; bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantBound = constantBound; BindConstantBufferRange(slotId + i, constantOffset, constantBound); } } bindings.maxCount = std::clamp(StartSlot + NumBuffers, bindings.maxCount, uint32_t(bindings.buffers.size())); } template template void D3D11CommonContext::SetShaderResources( UINT StartSlot, UINT NumResources, ID3D11ShaderResourceView* const* ppResources) { auto& bindings = m_state.srv[ShaderStage]; uint32_t slotId = computeSrvBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumResources; i++) { auto resView = static_cast(ppResources[i]); if (bindings.views[StartSlot + i] != resView) { if (likely(resView != nullptr)) { if (unlikely(resView->TestHazards())) { if (TestSrvHazards(resView)) resView = nullptr; // Only set if necessary, but don't reset it on every // bind as this would be more expensive than a few // redundant checks in OMSetRenderTargets and friends. bindings.hazardous.set(StartSlot + i, resView); } } bindings.views[StartSlot + i] = resView; BindShaderResource(slotId + i, resView); } } bindings.maxCount = std::clamp(StartSlot + NumResources, bindings.maxCount, uint32_t(bindings.views.size())); } template template void D3D11CommonContext::SetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { auto& bindings = m_state.samplers[ShaderStage]; uint32_t slotId = computeSamplerBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumSamplers; i++) { auto sampler = static_cast(ppSamplers[i]); if (bindings.samplers[StartSlot + i] != sampler) { bindings.samplers[StartSlot + i] = sampler; BindSampler(slotId + i, sampler); } } bindings.maxCount = std::clamp(StartSlot + NumSamplers, bindings.maxCount, uint32_t(bindings.samplers.size())); } template void D3D11CommonContext::SetRenderTargetsAndUnorderedAccessViews( UINT NumRTVs, ID3D11RenderTargetView* const* ppRenderTargetViews, ID3D11DepthStencilView* pDepthStencilView, UINT UAVStartSlot, UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, const UINT* pUAVInitialCounts) { if (TestRtvUavHazards(NumRTVs, ppRenderTargetViews, NumUAVs, ppUnorderedAccessViews)) return; bool needsUpdate = false; if (likely(NumRTVs != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)) { // Native D3D11 does not change the render targets if // the parameters passed to this method are invalid. if (!ValidateRenderTargets(NumRTVs, ppRenderTargetViews, pDepthStencilView)) return; for (uint32_t i = 0; i < m_state.om.rtvs.size(); i++) { auto rtv = i < NumRTVs ? static_cast(ppRenderTargetViews[i]) : nullptr; if (m_state.om.rtvs[i] != rtv) { m_state.om.rtvs[i] = rtv; needsUpdate = true; ResolveOmSrvHazards(rtv); if (NumUAVs == D3D11_KEEP_UNORDERED_ACCESS_VIEWS) ResolveOmUavHazards(rtv); } } auto dsv = static_cast(pDepthStencilView); if (m_state.om.dsv != dsv) { m_state.om.dsv = dsv; needsUpdate = true; ResolveOmSrvHazards(dsv); } m_state.om.maxRtv = NumRTVs; } if (unlikely(NumUAVs || m_state.om.maxUav)) { uint32_t uavSlotId = computeUavBinding (DxbcProgramType::PixelShader, 0); uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::PixelShader, 0); if (likely(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)) { uint32_t newMaxUav = NumUAVs ? UAVStartSlot + NumUAVs : 0; uint32_t oldMaxUav = std::exchange(m_state.om.maxUav, newMaxUav); for (uint32_t i = 0; i < std::max(oldMaxUav, newMaxUav); i++) { D3D11UnorderedAccessView* uav = nullptr; uint32_t ctr = ~0u; if (i >= UAVStartSlot && i < UAVStartSlot + NumUAVs) { uav = static_cast(ppUnorderedAccessViews[i - UAVStartSlot]); ctr = pUAVInitialCounts ? pUAVInitialCounts[i - UAVStartSlot] : ~0u; } if (m_state.om.uavs[i] != uav || ctr != ~0u) { m_state.om.uavs[i] = uav; BindUnorderedAccessView( uavSlotId + i, uav, ctrSlotId + i, ctr); ResolveOmSrvHazards(uav); if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) needsUpdate |= ResolveOmRtvHazards(uav); } } } } if (needsUpdate) BindFramebuffer(); } template void D3D11CommonContext::SetDrawBuffers( ID3D11Buffer* pBufferForArgs, ID3D11Buffer* pBufferForCount) { auto argBuffer = static_cast(pBufferForArgs); auto cntBuffer = static_cast(pBufferForCount); if (m_state.id.argBuffer != argBuffer || m_state.id.cntBuffer != cntBuffer) { m_state.id.argBuffer = argBuffer; m_state.id.cntBuffer = cntBuffer; BindDrawBuffers(argBuffer, cntBuffer); } } template bool D3D11CommonContext::TestRtvUavHazards( UINT NumRTVs, ID3D11RenderTargetView* const* ppRTVs, UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUAVs) { if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) NumRTVs = 0; if (NumUAVs == D3D11_KEEP_UNORDERED_ACCESS_VIEWS) NumUAVs = 0; for (uint32_t i = 0; i < NumRTVs; i++) { auto rtv = static_cast(ppRTVs[i]); if (!rtv) continue; for (uint32_t j = 0; j < i; j++) { if (CheckViewOverlap(rtv, static_cast(ppRTVs[j]))) return true; } if (rtv->HasBindFlag(D3D11_BIND_UNORDERED_ACCESS)) { for (uint32_t j = 0; j < NumUAVs; j++) { if (CheckViewOverlap(rtv, static_cast(ppUAVs[j]))) return true; } } } for (uint32_t i = 0; i < NumUAVs; i++) { auto uav = static_cast(ppUAVs[i]); if (!uav) continue; for (uint32_t j = 0; j < i; j++) { if (CheckViewOverlap(uav, static_cast(ppUAVs[j]))) return true; } } return false; } template template bool D3D11CommonContext::TestSrvHazards( D3D11ShaderResourceView* pView) { bool hazard = false; if (ShaderStage == DxbcProgramType::ComputeShader) { int32_t uav = m_state.uav.mask.findNext(0); while (uav >= 0 && !hazard) { hazard = CheckViewOverlap(pView, m_state.uav.views[uav].ptr()); uav = m_state.uav.mask.findNext(uav + 1); } } else { hazard = CheckViewOverlap(pView, m_state.om.dsv.ptr()); for (uint32_t i = 0; !hazard && i < m_state.om.maxRtv; i++) hazard = CheckViewOverlap(pView, m_state.om.rtvs[i].ptr()); for (uint32_t i = 0; !hazard && i < m_state.om.maxUav; i++) hazard = CheckViewOverlap(pView, m_state.om.uavs[i].ptr()); } return hazard; } template void D3D11CommonContext::TrackResourceSequenceNumber( ID3D11Resource* pResource) { if (!pResource) return; D3D11CommonTexture* texture = GetCommonTexture(pResource); if (texture) { if (texture->HasSequenceNumber()) { for (uint32_t i = 0; i < texture->CountSubresources(); i++) GetTypedContext()->TrackTextureSequenceNumber(texture, i); } } else { D3D11Buffer* buffer = static_cast(pResource); if (buffer->HasSequenceNumber()) GetTypedContext()->TrackBufferSequenceNumber(buffer); } } template void D3D11CommonContext::UpdateBuffer( D3D11Buffer* pDstBuffer, UINT Offset, UINT Length, const void* pSrcData) { DxvkBufferSlice bufferSlice = pDstBuffer->GetBufferSlice(Offset, Length); if (Length <= 1024 && !(Offset & 0x3) && !(Length & 0x3)) { // The backend has special code paths for small buffer updates, // however both offset and size must be aligned to four bytes. DxvkDataSlice dataSlice = AllocUpdateBufferSlice(Length); std::memcpy(dataSlice.ptr(), pSrcData, Length); EmitCs([ cDataBuffer = std::move(dataSlice), cBufferSlice = std::move(bufferSlice) ] (DxvkContext* ctx) { ctx->updateBuffer( cBufferSlice.buffer(), cBufferSlice.offset(), cBufferSlice.length(), cDataBuffer.ptr()); }); } else { // Otherwise, to avoid large data copies on the CS thread, // write directly to a staging buffer and dispatch a copy DxvkBufferSlice stagingSlice = AllocStagingBuffer(Length); std::memcpy(stagingSlice.mapPtr(0), pSrcData, Length); EmitCs([ cStagingSlice = std::move(stagingSlice), cBufferSlice = std::move(bufferSlice) ] (DxvkContext* ctx) { ctx->copyBuffer( cBufferSlice.buffer(), cBufferSlice.offset(), cStagingSlice.buffer(), cStagingSlice.offset(), cBufferSlice.length()); }); } if (pDstBuffer->HasSequenceNumber()) GetTypedContext()->TrackBufferSequenceNumber(pDstBuffer); } template void D3D11CommonContext::UpdateTexture( D3D11CommonTexture* pDstTexture, UINT DstSubresource, const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch) { if (DstSubresource >= pDstTexture->CountSubresources()) return; VkFormat packedFormat = pDstTexture->GetPackedFormat(); auto formatInfo = lookupFormatInfo(packedFormat); auto subresource = pDstTexture->GetSubresourceFromIndex( formatInfo->aspectMask, DstSubresource); VkExtent3D mipExtent = pDstTexture->MipLevelExtent(subresource.mipLevel); VkOffset3D offset = { 0, 0, 0 }; VkExtent3D extent = mipExtent; if (pDstBox != nullptr) { if (pDstBox->left >= pDstBox->right || pDstBox->top >= pDstBox->bottom || pDstBox->front >= pDstBox->back) return; // no-op, but legal offset.x = pDstBox->left; offset.y = pDstBox->top; offset.z = pDstBox->front; extent.width = pDstBox->right - pDstBox->left; extent.height = pDstBox->bottom - pDstBox->top; extent.depth = pDstBox->back - pDstBox->front; } if (!util::isBlockAligned(offset, extent, formatInfo->blockSize, mipExtent)) return; auto stagingSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, extent)); util::packImageData(stagingSlice.mapPtr(0), pSrcData, SrcRowPitch, SrcDepthPitch, 0, 0, pDstTexture->GetVkImageType(), extent, 1, formatInfo, formatInfo->aspectMask); UpdateImage(pDstTexture, &subresource, offset, extent, std::move(stagingSlice)); } template void D3D11CommonContext::UpdateImage( D3D11CommonTexture* pDstTexture, const VkImageSubresource* pDstSubresource, VkOffset3D DstOffset, VkExtent3D DstExtent, DxvkBufferSlice StagingBuffer) { bool dstIsImage = pDstTexture->GetMapMode() != D3D11_COMMON_TEXTURE_MAP_MODE_STAGING; uint32_t dstSubresource = D3D11CalcSubresource(pDstSubresource->mipLevel, pDstSubresource->arrayLayer, pDstTexture->Desc()->MipLevels); if (dstIsImage) { EmitCs([ cDstImage = pDstTexture->GetImage(), cDstLayers = vk::makeSubresourceLayers(*pDstSubresource), cDstOffset = DstOffset, cDstExtent = DstExtent, cStagingSlice = std::move(StagingBuffer), cPackedFormat = pDstTexture->GetPackedFormat() ] (DxvkContext* ctx) { if (cDstLayers.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyBufferToImage(cDstImage, cDstLayers, cDstOffset, cDstExtent, cStagingSlice.buffer(), cStagingSlice.offset(), 0, 0); } else { ctx->copyPackedBufferToDepthStencilImage(cDstImage, cDstLayers, VkOffset2D { cDstOffset.x, cDstOffset.y }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cStagingSlice.buffer(), cStagingSlice.offset(), VkOffset2D { 0, 0 }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cPackedFormat); } }); } else { // If the destination image is backed only by a buffer, we need to use // the packed buffer copy function which does not know about planes and // format metadata, so deal with it manually here. VkExtent3D dstMipExtent = pDstTexture->MipLevelExtent(pDstSubresource->mipLevel); auto dstFormat = pDstTexture->GetPackedFormat(); auto dstFormatInfo = lookupFormatInfo(dstFormat); uint32_t planeCount = 1; if (dstFormatInfo->flags.test(DxvkFormatFlag::MultiPlane)) planeCount = vk::getPlaneCount(dstFormatInfo->aspectMask); // The source data isn't stored in an image so we'll also need to // track the offset for that while iterating over the planes. VkDeviceSize srcPlaneOffset = 0; for (uint32_t i = 0; i < planeCount; i++) { VkImageAspectFlags dstAspectMask = dstFormatInfo->aspectMask; VkDeviceSize elementSize = dstFormatInfo->elementSize; VkExtent3D blockSize = dstFormatInfo->blockSize; if (dstFormatInfo->flags.test(DxvkFormatFlag::MultiPlane)) { dstAspectMask = vk::getPlaneAspect(i); auto plane = &dstFormatInfo->planes[i]; blockSize.width *= plane->blockSize.width; blockSize.height *= plane->blockSize.height; elementSize = plane->elementSize; } VkExtent3D blockCount = util::computeBlockCount(DstExtent, blockSize); EmitCs([ cDstBuffer = pDstTexture->GetMappedBuffer(dstSubresource), cDstStart = pDstTexture->GetSubresourceLayout(dstAspectMask, dstSubresource).Offset, cDstOffset = util::computeBlockOffset(DstOffset, blockSize), cDstSize = util::computeBlockCount(dstMipExtent, blockSize), cDstExtent = blockCount, cSrcBuffer = StagingBuffer.buffer(), cSrcStart = StagingBuffer.offset() + srcPlaneOffset, cPixelSize = elementSize ] (DxvkContext* ctx) { ctx->copyPackedBufferImage( cDstBuffer, cDstStart, cDstOffset, cDstSize, cSrcBuffer, cSrcStart, VkOffset3D(), cDstExtent, cDstExtent, cPixelSize); }); srcPlaneOffset += util::flattenImageExtent(blockCount) * elementSize; } } if (pDstTexture->HasSequenceNumber()) GetTypedContext()->TrackTextureSequenceNumber(pDstTexture, dstSubresource); } template void D3D11CommonContext::UpdateResource( ID3D11Resource* pDstResource, UINT DstSubresource, const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch, UINT CopyFlags) { auto context = static_cast(this); D3D10DeviceLock lock = context->LockContext(); if (!pDstResource) return; // We need a different code path for buffers D3D11_RESOURCE_DIMENSION resourceType; pDstResource->GetType(&resourceType); if (likely(resourceType == D3D11_RESOURCE_DIMENSION_BUFFER)) { const auto bufferResource = static_cast(pDstResource); uint64_t bufferSize = bufferResource->Desc()->ByteWidth; // Provide a fast path for mapped buffer updates since some // games use UpdateSubresource to update constant buffers. if (likely(bufferResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_DIRECT) && likely(!pDstBox)) { context->UpdateMappedBuffer(bufferResource, 0, bufferSize, pSrcData, 0); return; } // Validate buffer range to update uint64_t offset = 0; uint64_t length = bufferSize; if (pDstBox) { offset = pDstBox->left; length = pDstBox->right - offset; } if (unlikely(offset + length > bufferSize)) return; // Still try to be fast if a box is provided but we update the full buffer if (likely(bufferResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_DIRECT)) { CopyFlags &= D3D11_COPY_DISCARD | D3D11_COPY_NO_OVERWRITE; if (likely(length == bufferSize) || unlikely(CopyFlags != 0)) { context->UpdateMappedBuffer(bufferResource, offset, length, pSrcData, CopyFlags); return; } } // Otherwise we can't really do anything fancy, so just do a GPU copy context->UpdateBuffer(bufferResource, offset, length, pSrcData); } else { D3D11CommonTexture* textureResource = GetCommonTexture(pDstResource); context->UpdateTexture(textureResource, DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch); } } template bool D3D11CommonContext::ValidateRenderTargets( UINT NumViews, ID3D11RenderTargetView* const* ppRenderTargetViews, ID3D11DepthStencilView* pDepthStencilView) { Rc refView; VkExtent3D dsvExtent = { 0u, 0u, 0u }; VkExtent3D rtvExtent = { 0u, 0u, 0u }; if (pDepthStencilView != nullptr) { refView = static_cast( pDepthStencilView)->GetImageView(); dsvExtent = refView->mipLevelExtent(0); } for (uint32_t i = 0; i < NumViews; i++) { if (ppRenderTargetViews[i] != nullptr) { auto curView = static_cast( ppRenderTargetViews[i])->GetImageView(); if (!rtvExtent.width) rtvExtent = curView->mipLevelExtent(0); if (refView != nullptr) { // Render target views must all have the same sample count, // layer count, and type. The size can mismatch under certain // conditions, the D3D11 documentation is wrong here. if (curView->info().type != refView->info().type || curView->info().numLayers != refView->info().numLayers) return false; if (curView->imageInfo().sampleCount != refView->imageInfo().sampleCount) return false; // Color targets must all be the same size VkExtent3D curExtent = curView->mipLevelExtent(0); if (curExtent.width != rtvExtent.width || curExtent.height != rtvExtent.height) return false; } else { // Set reference view. All remaining views // must be compatible to the reference view. refView = curView; } } } // Based on testing, the depth-stencil target is allowed // to be larger than all color targets, but not smaller if (rtvExtent.width && dsvExtent.width) { if (rtvExtent.width > dsvExtent.width || rtvExtent.height > dsvExtent.height) return false; } return true; } template void D3D11CommonContext::InitDefaultPrimitiveTopology( DxvkInputAssemblyState* pIaState) { pIaState->primitiveTopology = VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; pIaState->primitiveRestart = VK_FALSE; pIaState->patchVertexCount = 0; } template void D3D11CommonContext::InitDefaultRasterizerState( DxvkRasterizerState* pRsState) { pRsState->polygonMode = VK_POLYGON_MODE_FILL; pRsState->cullMode = VK_CULL_MODE_BACK_BIT; pRsState->frontFace = VK_FRONT_FACE_CLOCKWISE; pRsState->depthClipEnable = VK_TRUE; pRsState->depthBiasEnable = VK_FALSE; pRsState->conservativeMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; pRsState->sampleCount = 0; pRsState->flatShading = VK_FALSE; } template void D3D11CommonContext::InitDefaultDepthStencilState( DxvkDepthStencilState* pDsState) { VkStencilOpState stencilOp; stencilOp.failOp = VK_STENCIL_OP_KEEP; stencilOp.passOp = VK_STENCIL_OP_KEEP; stencilOp.depthFailOp = VK_STENCIL_OP_KEEP; stencilOp.compareOp = VK_COMPARE_OP_ALWAYS; stencilOp.compareMask = D3D11_DEFAULT_STENCIL_READ_MASK; stencilOp.writeMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; stencilOp.reference = 0; pDsState->enableDepthTest = VK_TRUE; pDsState->enableDepthWrite = VK_TRUE; pDsState->enableStencilTest = VK_FALSE; pDsState->depthCompareOp = VK_COMPARE_OP_LESS; pDsState->stencilOpFront = stencilOp; pDsState->stencilOpBack = stencilOp; } template void D3D11CommonContext::InitDefaultBlendState( DxvkBlendMode* pCbState, DxvkLogicOpState* pLoState, DxvkMultisampleState* pMsState, UINT SampleMask) { pCbState->enableBlending = VK_FALSE; pCbState->colorSrcFactor = VK_BLEND_FACTOR_ONE; pCbState->colorDstFactor = VK_BLEND_FACTOR_ZERO; pCbState->colorBlendOp = VK_BLEND_OP_ADD; pCbState->alphaSrcFactor = VK_BLEND_FACTOR_ONE; pCbState->alphaDstFactor = VK_BLEND_FACTOR_ZERO; pCbState->alphaBlendOp = VK_BLEND_OP_ADD; pCbState->writeMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; pLoState->enableLogicOp = VK_FALSE; pLoState->logicOp = VK_LOGIC_OP_NO_OP; pMsState->sampleMask = SampleMask; pMsState->enableAlphaToCoverage = VK_FALSE; } // Explicitly instantiate here template class D3D11CommonContext; template class D3D11CommonContext; }