#include "d3d11_cmdlist.h" #include "d3d11_context_imm.h" #include "d3d11_device.h" #include "d3d11_fence.h" #include "d3d11_texture.h" #include "../util/util_win32_compat.h" constexpr static uint32_t MinFlushIntervalUs = 750; constexpr static uint32_t IncFlushIntervalUs = 250; constexpr static uint32_t MaxPendingSubmits = 6; namespace dxvk { D3D11ImmediateContext::D3D11ImmediateContext( D3D11Device* pParent, const Rc& Device) : D3D11CommonContext(pParent, Device, 0, DxvkCsChunkFlag::SingleUse), m_csThread(Device, Device->createContext(DxvkContextType::Primary)), m_maxImplicitDiscardSize(pParent->GetOptions()->maxImplicitDiscardSize), m_submissionFence(new sync::CallbackFence()), m_multithread(this, false, pParent->GetOptions()->enableContextLock), m_videoContext(this, Device) { EmitCs([ cDevice = m_device, cRelaxedBarriers = pParent->GetOptions()->relaxedBarriers, cIgnoreGraphicsBarriers = pParent->GetOptions()->ignoreGraphicsBarriers ] (DxvkContext* ctx) { ctx->beginRecording(cDevice->createCommandList()); DxvkBarrierControlFlags barrierControl; if (cRelaxedBarriers) barrierControl.set(DxvkBarrierControl::IgnoreWriteAfterWrite); if (cIgnoreGraphicsBarriers) barrierControl.set(DxvkBarrierControl::IgnoreGraphicsBarriers); ctx->setBarrierControl(barrierControl); }); ClearState(); } D3D11ImmediateContext::~D3D11ImmediateContext() { // Avoids hanging when in this state, see comment // in DxvkDevice::~DxvkDevice. if (this_thread::isInModuleDetachment()) return; ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); SynchronizeCsThread(DxvkCsThread::SynchronizeAll); SynchronizeDevice(); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::QueryInterface(REFIID riid, void** ppvObject) { if (riid == __uuidof(ID3D10Multithread)) { *ppvObject = ref(&m_multithread); return S_OK; } if (riid == __uuidof(ID3D11VideoContext)) { *ppvObject = ref(&m_videoContext); return S_OK; } return D3D11CommonContext::QueryInterface(riid, ppvObject); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::GetData( ID3D11Asynchronous* pAsync, void* pData, UINT DataSize, UINT GetDataFlags) { if (!pAsync || (DataSize && !pData)) return E_INVALIDARG; // Check whether the data size is actually correct if (DataSize && DataSize != pAsync->GetDataSize()) return E_INVALIDARG; // Passing a non-null pData is actually allowed if // DataSize is 0, but we should ignore that pointer pData = DataSize ? pData : nullptr; // Get query status directly from the query object auto query = static_cast(pAsync); HRESULT hr = query->GetData(pData, GetDataFlags); // If we're likely going to spin on the asynchronous object, // flush the context so that we're keeping the GPU busy. if (hr == S_FALSE) { // Don't mark the event query as stalling if the app does // not intend to spin on it. This reduces flushes on End. if (!(GetDataFlags & D3D11_ASYNC_GETDATA_DONOTFLUSH)) query->NotifyStall(); // Ignore the DONOTFLUSH flag here as some games will spin // on queries without ever flushing the context otherwise. D3D10DeviceLock lock = LockContext(); ConsiderFlush(GpuFlushType::ImplicitSynchronization); } return hr; } void STDMETHODCALLTYPE D3D11ImmediateContext::Begin(ID3D11Asynchronous* pAsync) { D3D10DeviceLock lock = LockContext(); if (unlikely(!pAsync)) return; auto query = static_cast(pAsync); if (unlikely(!query->DoBegin())) return; EmitCs([cQuery = Com(query)] (DxvkContext* ctx) { cQuery->Begin(ctx); }); } void STDMETHODCALLTYPE D3D11ImmediateContext::End(ID3D11Asynchronous* pAsync) { D3D10DeviceLock lock = LockContext(); if (unlikely(!pAsync)) return; auto query = static_cast(pAsync); if (unlikely(!query->DoEnd())) { EmitCs([cQuery = Com(query)] (DxvkContext* ctx) { cQuery->Begin(ctx); }); } EmitCs([cQuery = Com(query)] (DxvkContext* ctx) { cQuery->End(ctx); }); if (unlikely(query->TrackStalls())) { query->NotifyEnd(); if (query->IsStalling()) ExecuteFlush(GpuFlushType::ImplicitSynchronization, nullptr, false); else if (query->IsEvent()) ConsiderFlush(GpuFlushType::ImplicitStrongHint); } } void STDMETHODCALLTYPE D3D11ImmediateContext::Flush() { D3D10DeviceLock lock = LockContext(); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); } void STDMETHODCALLTYPE D3D11ImmediateContext::Flush1( D3D11_CONTEXT_TYPE ContextType, HANDLE hEvent) { D3D10DeviceLock lock = LockContext(); ExecuteFlush(GpuFlushType::ExplicitFlush, hEvent, true); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Signal( ID3D11Fence* pFence, UINT64 Value) { D3D10DeviceLock lock = LockContext(); auto fence = static_cast(pFence); if (!fence) return E_INVALIDARG; EmitCs([ cFence = fence->GetFence(), cValue = Value ] (DxvkContext* ctx) { ctx->signalFence(cFence, cValue); }); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); return S_OK; } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Wait( ID3D11Fence* pFence, UINT64 Value) { D3D10DeviceLock lock = LockContext(); auto fence = static_cast(pFence); if (!fence) return E_INVALIDARG; ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); EmitCs([ cFence = fence->GetFence(), cValue = Value ] (DxvkContext* ctx) { ctx->waitFence(cFence, cValue); }); return S_OK; } void STDMETHODCALLTYPE D3D11ImmediateContext::ExecuteCommandList( ID3D11CommandList* pCommandList, BOOL RestoreContextState) { D3D10DeviceLock lock = LockContext(); auto commandList = static_cast(pCommandList); // Clear state so that the command list can't observe any // current context state. The command list itself will clean // up after execution to ensure that no state changes done // by the command list are visible to the immediate context. ResetCommandListState(); // Flush any outstanding commands so that // we don't mess up the execution order FlushCsChunk(); // As an optimization, flush everything if the // number of pending draw calls is high enough. ConsiderFlush(GpuFlushType::ImplicitWeakHint); // Dispatch command list to the CS thread commandList->EmitToCsThread([this] (DxvkCsChunkRef&& chunk, GpuFlushType flushType) { EmitCsChunk(std::move(chunk)); // Return the sequence number from before the flush since // that is actually going to be needed for resource tracking uint64_t csSeqNum = m_csSeqNum; // Consider a flush after every chunk in case the app // submits a very large command list or the GPU is idle ConsiderFlush(flushType); return csSeqNum; }); // Restore the immediate context's state if (RestoreContextState) RestoreCommandListState(); else ResetContextState(); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::FinishCommandList( BOOL RestoreDeferredContextState, ID3D11CommandList **ppCommandList) { InitReturnPtr(ppCommandList); Logger::err("D3D11: FinishCommandList called on immediate context"); return DXGI_ERROR_INVALID_CALL; } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Map( ID3D11Resource* pResource, UINT Subresource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { D3D10DeviceLock lock = LockContext(); if (unlikely(!pResource)) return E_INVALIDARG; D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; pResource->GetType(&resourceDim); HRESULT hr; if (likely(resourceDim == D3D11_RESOURCE_DIMENSION_BUFFER)) { hr = MapBuffer( static_cast(pResource), MapType, MapFlags, pMappedResource); } else { hr = MapImage( GetCommonTexture(pResource), Subresource, MapType, MapFlags, pMappedResource); } if (unlikely(FAILED(hr))) *pMappedResource = D3D11_MAPPED_SUBRESOURCE(); return hr; } void STDMETHODCALLTYPE D3D11ImmediateContext::Unmap( ID3D11Resource* pResource, UINT Subresource) { // Since it is very uncommon for images to be mapped compared // to buffers, we count the currently mapped images in order // to avoid a virtual method call in the common case. if (unlikely(m_mappedImageCount > 0)) { D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; pResource->GetType(&resourceDim); if (resourceDim != D3D11_RESOURCE_DIMENSION_BUFFER) { D3D10DeviceLock lock = LockContext(); UnmapImage(GetCommonTexture(pResource), Subresource); } } } HRESULT D3D11ImmediateContext::MapBuffer( D3D11Buffer* pResource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { if (unlikely(!pMappedResource)) return E_INVALIDARG; if (unlikely(pResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_NONE)) { Logger::err("D3D11: Cannot map a device-local buffer"); return E_INVALIDARG; } VkDeviceSize bufferSize = pResource->Desc()->ByteWidth; if (likely(MapType == D3D11_MAP_WRITE_DISCARD)) { // Allocate a new backing slice for the buffer and set // it as the 'new' mapped slice. This assumes that the // only way to invalidate a buffer is by mapping it. auto physSlice = pResource->DiscardSlice(); pMappedResource->pData = physSlice.mapPtr; pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; EmitCs([ cBuffer = pResource->GetBuffer(), cBufferSlice = physSlice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cBufferSlice); }); return S_OK; } else if (likely(MapType == D3D11_MAP_WRITE_NO_OVERWRITE)) { // Put this on a fast path without any extra checks since it's // a somewhat desired method to partially update large buffers DxvkBufferSliceHandle physSlice = pResource->GetMappedSlice(); pMappedResource->pData = physSlice.mapPtr; pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; return S_OK; } else { // Quantum Break likes using MAP_WRITE on resources which would force // us to synchronize with the GPU multiple times per frame. In those // situations, if there are no pending GPU writes to the resource, we // can promote it to MAP_WRITE_DISCARD, but preserve the data by doing // a CPU copy from the previous buffer slice, to avoid the sync point. bool doInvalidatePreserve = false; auto buffer = pResource->GetBuffer(); auto sequenceNumber = pResource->GetSequenceNumber(); if (MapType != D3D11_MAP_READ && !MapFlags && bufferSize <= m_maxImplicitDiscardSize) { SynchronizeCsThread(sequenceNumber); bool hasWoAccess = buffer->isInUse(DxvkAccess::Write); bool hasRwAccess = buffer->isInUse(DxvkAccess::Read); if (hasRwAccess && !hasWoAccess) { // Uncached reads can be so slow that a GPU sync may actually be faster doInvalidatePreserve = buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_CACHED_BIT; } } if (doInvalidatePreserve) { auto prevSlice = pResource->GetMappedSlice(); auto physSlice = pResource->DiscardSlice(); EmitCs([ cBuffer = std::move(buffer), cBufferSlice = physSlice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cBufferSlice); }); std::memcpy(physSlice.mapPtr, prevSlice.mapPtr, physSlice.length); pMappedResource->pData = physSlice.mapPtr; pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; return S_OK; } else { if (!WaitForResource(buffer, sequenceNumber, MapType, MapFlags)) return DXGI_ERROR_WAS_STILL_DRAWING; DxvkBufferSliceHandle physSlice = pResource->GetMappedSlice(); pMappedResource->pData = physSlice.mapPtr; pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; return S_OK; } } } HRESULT D3D11ImmediateContext::MapImage( D3D11CommonTexture* pResource, UINT Subresource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { const Rc mappedImage = pResource->GetImage(); const Rc mappedBuffer = pResource->GetMappedBuffer(Subresource); auto mapMode = pResource->GetMapMode(); if (unlikely(mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_NONE)) { Logger::err("D3D11: Cannot map a device-local image"); return E_INVALIDARG; } if (unlikely(Subresource >= pResource->CountSubresources())) return E_INVALIDARG; if (likely(pMappedResource != nullptr)) { // Resources with an unknown memory layout cannot return a pointer if (pResource->Desc()->Usage == D3D11_USAGE_DEFAULT && pResource->Desc()->TextureLayout == D3D11_TEXTURE_LAYOUT_UNDEFINED) return E_INVALIDARG; } else { if (pResource->Desc()->Usage != D3D11_USAGE_DEFAULT) return E_INVALIDARG; } VkFormat packedFormat = m_parent->LookupPackedFormat( pResource->Desc()->Format, pResource->GetFormatMode()).Format; uint64_t sequenceNumber = pResource->GetSequenceNumber(Subresource); auto formatInfo = lookupFormatInfo(packedFormat); void* mapPtr; if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_DIRECT) { // Wait for the resource to become available. We do not // support image renaming, so stall on DISCARD instead. if (MapType == D3D11_MAP_WRITE_DISCARD) MapFlags &= ~D3D11_MAP_FLAG_DO_NOT_WAIT; if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) { if (!WaitForResource(mappedImage, sequenceNumber, MapType, MapFlags)) return DXGI_ERROR_WAS_STILL_DRAWING; } // Query the subresource's memory layout and hope that // the application respects the returned pitch values. mapPtr = mappedImage->mapPtr(0); } else { constexpr uint32_t DoInvalidate = (1u << 0); constexpr uint32_t DoPreserve = (1u << 1); constexpr uint32_t DoWait = (1u << 2); uint32_t doFlags; if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) { // If the image can be written by the GPU, we need to update the // mapped staging buffer to reflect the current image contents. if (pResource->Desc()->Usage == D3D11_USAGE_DEFAULT) { bool needsReadback = !pResource->NeedsDirtyRegionTracking(); needsReadback |= MapType == D3D11_MAP_READ || MapType == D3D11_MAP_READ_WRITE; if (needsReadback) ReadbackImageBuffer(pResource, Subresource); } } if (MapType == D3D11_MAP_READ) { // Reads will not change the image content, so we only need // to wait for the GPU to finish writing to the mapped buffer. doFlags = DoWait; } else if (MapType == D3D11_MAP_WRITE_DISCARD) { doFlags = DoInvalidate; // If we know for sure that the mapped buffer is currently not // in use by the GPU, we don't have to allocate a new slice. if (m_csThread.lastSequenceNumber() >= sequenceNumber && !mappedBuffer->isInUse(DxvkAccess::Read)) doFlags = 0; } else if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_STAGING && (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT)) { // Always respect DO_NOT_WAIT for mapped staging images doFlags = DoWait; } else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE || mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) { // Need to synchronize thread to determine pending GPU accesses SynchronizeCsThread(sequenceNumber); // Don't implicitly discard large buffers or buffers of images with // multiple subresources, as that is likely to cause memory issues. VkDeviceSize bufferSize = pResource->GetMappedSlice(Subresource).length; if (bufferSize >= m_maxImplicitDiscardSize || pResource->CountSubresources() > 1) { // Don't check access flags, WaitForResource will return // early anyway if the resource is currently in use doFlags = DoWait; } else if (mappedBuffer->isInUse(DxvkAccess::Write)) { // There are pending GPU writes, need to wait for those doFlags = DoWait; } else if (mappedBuffer->isInUse(DxvkAccess::Read)) { // All pending GPU accesses are reads, so the buffer data // is still current, and we can prevent GPU synchronization // by creating a new slice with an exact copy of the data. doFlags = DoInvalidate | DoPreserve; } else { // There are no pending accesses, so we don't need to wait doFlags = 0; } } else { // No need to synchronize staging resources with NO_OVERWRITE // since the buffer will be used directly. doFlags = 0; } if (doFlags & DoInvalidate) { DxvkBufferSliceHandle prevSlice = pResource->GetMappedSlice(Subresource); DxvkBufferSliceHandle physSlice = pResource->DiscardSlice(Subresource); EmitCs([ cImageBuffer = mappedBuffer, cBufferSlice = physSlice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cImageBuffer, cBufferSlice); }); if (doFlags & DoPreserve) std::memcpy(physSlice.mapPtr, prevSlice.mapPtr, physSlice.length); mapPtr = physSlice.mapPtr; } else { if (doFlags & DoWait) { // We cannot respect DO_NOT_WAIT for buffer-mapped resources since // our internal copies need to be transparent to the application. if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) MapFlags &= ~D3D11_MAP_FLAG_DO_NOT_WAIT; // Wait for mapped buffer to become available if (!WaitForResource(mappedBuffer, sequenceNumber, MapType, MapFlags)) return DXGI_ERROR_WAS_STILL_DRAWING; } mapPtr = pResource->GetMappedSlice(Subresource).mapPtr; } } // Mark the given subresource as mapped pResource->SetMapType(Subresource, MapType); if (pMappedResource) { auto layout = pResource->GetSubresourceLayout(formatInfo->aspectMask, Subresource); pMappedResource->pData = reinterpret_cast(mapPtr) + layout.Offset; pMappedResource->RowPitch = layout.RowPitch; pMappedResource->DepthPitch = layout.DepthPitch; } m_mappedImageCount += 1; return S_OK; } void D3D11ImmediateContext::UnmapImage( D3D11CommonTexture* pResource, UINT Subresource) { D3D11_MAP mapType = pResource->GetMapType(Subresource); pResource->SetMapType(Subresource, D3D11_MAP(~0u)); if (mapType == D3D11_MAP(~0u)) return; // Decrement mapped image counter only after making sure // the given subresource is actually mapped right now m_mappedImageCount -= 1; if ((mapType != D3D11_MAP_READ) && (pResource->GetMapMode() == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER)) { if (pResource->NeedsDirtyRegionTracking()) { for (uint32_t i = 0; i < pResource->GetDirtyRegionCount(Subresource); i++) { D3D11_COMMON_TEXTURE_REGION region = pResource->GetDirtyRegion(Subresource, i); UpdateDirtyImageRegion(pResource, Subresource, ®ion); } pResource->ClearDirtyRegions(Subresource); } else { UpdateDirtyImageRegion(pResource, Subresource, nullptr); } } } void D3D11ImmediateContext::ReadbackImageBuffer( D3D11CommonTexture* pResource, UINT Subresource) { VkImageAspectFlags aspectMask = lookupFormatInfo(pResource->GetPackedFormat())->aspectMask; VkImageSubresource subresource = pResource->GetSubresourceFromIndex(aspectMask, Subresource); EmitCs([ cSrcImage = pResource->GetImage(), cSrcSubresource = vk::makeSubresourceLayers(subresource), cDstBuffer = pResource->GetMappedBuffer(Subresource), cPackedFormat = pResource->GetPackedFormat() ] (DxvkContext* ctx) { VkOffset3D offset = { 0, 0, 0 }; VkExtent3D extent = cSrcImage->mipLevelExtent(cSrcSubresource.mipLevel); if (cSrcSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyImageToBuffer(cDstBuffer, 0, 0, 0, cSrcImage, cSrcSubresource, offset, extent); } else { ctx->copyDepthStencilImageToPackedBuffer(cDstBuffer, 0, VkOffset2D { 0, 0 }, VkExtent2D { extent.width, extent.height }, cSrcImage, cSrcSubresource, VkOffset2D { 0, 0 }, VkExtent2D { extent.width, extent.height }, cPackedFormat); } }); if (pResource->HasSequenceNumber()) TrackTextureSequenceNumber(pResource, Subresource); } void D3D11ImmediateContext::UpdateDirtyImageRegion( D3D11CommonTexture* pResource, UINT Subresource, const D3D11_COMMON_TEXTURE_REGION* pRegion) { auto formatInfo = lookupFormatInfo(pResource->GetPackedFormat()); auto subresource = vk::makeSubresourceLayers( pResource->GetSubresourceFromIndex(formatInfo->aspectMask, Subresource)); // Update the entire image if no dirty region was specified D3D11_COMMON_TEXTURE_REGION region; if (pRegion) { region = *pRegion; } else { region.Offset = VkOffset3D { 0, 0, 0 }; region.Extent = pResource->MipLevelExtent(subresource.mipLevel); } auto subresourceLayout = pResource->GetSubresourceLayout(formatInfo->aspectMask, Subresource); // Update dirty region one aspect at a time, due to // how the data is laid out in the staging buffer. for (uint32_t i = 0; i < pResource->GetPlaneCount(); i++) { subresource.aspectMask = formatInfo->aspectMask; if (formatInfo->flags.test(DxvkFormatFlag::MultiPlane)) subresource.aspectMask = vk::getPlaneAspect(i); EmitCs([ cDstImage = pResource->GetImage(), cDstSubresource = subresource, cDstOffset = region.Offset, cDstExtent = region.Extent, cSrcBuffer = pResource->GetMappedBuffer(Subresource), cSrcOffset = pResource->ComputeMappedOffset(Subresource, i, region.Offset), cSrcRowPitch = subresourceLayout.RowPitch, cSrcDepthPitch = subresourceLayout.DepthPitch, cPackedFormat = pResource->GetPackedFormat() ] (DxvkContext* ctx) { if (cDstSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyBufferToImage( cDstImage, cDstSubresource, cDstOffset, cDstExtent, cSrcBuffer, cSrcOffset, cSrcRowPitch, cSrcDepthPitch); } else { ctx->copyPackedBufferToDepthStencilImage( cDstImage, cDstSubresource, VkOffset2D { cDstOffset.x, cDstOffset.y }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cSrcBuffer, 0, VkOffset2D { cDstOffset.x, cDstOffset.y }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cPackedFormat); } }); } if (pResource->HasSequenceNumber()) TrackTextureSequenceNumber(pResource, Subresource); } void D3D11ImmediateContext::UpdateMappedBuffer( D3D11Buffer* pDstBuffer, UINT Offset, UINT Length, const void* pSrcData, UINT CopyFlags) { DxvkBufferSliceHandle slice; if (likely(CopyFlags != D3D11_COPY_NO_OVERWRITE)) { slice = pDstBuffer->DiscardSlice(); EmitCs([ cBuffer = pDstBuffer->GetBuffer(), cBufferSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cBufferSlice); }); } else { slice = pDstBuffer->GetMappedSlice(); } std::memcpy(reinterpret_cast(slice.mapPtr) + Offset, pSrcData, Length); } void STDMETHODCALLTYPE D3D11ImmediateContext::SwapDeviceContextState( ID3DDeviceContextState* pState, ID3DDeviceContextState** ppPreviousState) { InitReturnPtr(ppPreviousState); if (!pState) return; // Reset all state affected by the current context state ResetCommandListState(); Com oldState = std::move(m_stateObject); Com newState = static_cast(pState); if (oldState == nullptr) oldState = new D3D11DeviceContextState(m_parent); if (ppPreviousState) *ppPreviousState = oldState.ref(); m_stateObject = newState; oldState->SetState(m_state); newState->GetState(m_state); // Restore all state affected by the new context state RestoreCommandListState(); } void D3D11ImmediateContext::Acquire11on12Resource( ID3D11Resource* pResource, VkImageLayout SrcLayout) { D3D10DeviceLock lock = LockContext(); auto texture = GetCommonTexture(pResource); auto buffer = GetCommonBuffer(pResource); if (buffer) { EmitCs([ cBuffer = buffer->GetBuffer() ] (DxvkContext* ctx) { ctx->emitBufferBarrier(cBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, cBuffer->info().stages, cBuffer->info().access); }); } else if (texture) { EmitCs([ cImage = texture->GetImage(), cLayout = SrcLayout ] (DxvkContext* ctx) { ctx->emitImageBarrier(cImage, cLayout, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, cImage->info().layout, cImage->info().stages, cImage->info().access); }); } } void D3D11ImmediateContext::Release11on12Resource( ID3D11Resource* pResource, VkImageLayout DstLayout) { D3D10DeviceLock lock = LockContext(); auto texture = GetCommonTexture(pResource); auto buffer = GetCommonBuffer(pResource); if (buffer) { EmitCs([ cBuffer = buffer->GetBuffer() ] (DxvkContext* ctx) { ctx->emitBufferBarrier(cBuffer, cBuffer->info().stages, cBuffer->info().access, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT); }); } else if (texture) { EmitCs([ cImage = texture->GetImage(), cLayout = DstLayout ] (DxvkContext* ctx) { ctx->emitImageBarrier(cImage, cImage->info().layout, cImage->info().stages, cImage->info().access, cLayout, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT); }); } } void D3D11ImmediateContext::SynchronizeCsThread(uint64_t SequenceNumber) { D3D10DeviceLock lock = LockContext(); // Dispatch current chunk so that all commands // recorded prior to this function will be run if (SequenceNumber > m_csSeqNum) FlushCsChunk(); m_csThread.synchronize(SequenceNumber); } void D3D11ImmediateContext::SynchronizeDevice() { m_device->waitForIdle(); } void D3D11ImmediateContext::EndFrame() { D3D10DeviceLock lock = LockContext(); EmitCs([] (DxvkContext* ctx) { ctx->endFrame(); }); } bool D3D11ImmediateContext::WaitForResource( const Rc& Resource, uint64_t SequenceNumber, D3D11_MAP MapType, UINT MapFlags) { // Determine access type to wait for based on map mode DxvkAccess access = MapType == D3D11_MAP_READ ? DxvkAccess::Write : DxvkAccess::Read; // Wait for any CS chunk using the resource to execute, since // otherwise we cannot accurately determine if the resource is // actually being used by the GPU right now. bool isInUse = Resource->isInUse(access); if (!isInUse) { SynchronizeCsThread(SequenceNumber); isInUse = Resource->isInUse(access); } if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT) { if (isInUse) { // We don't have to wait, but misbehaving games may // still try to spin on `Map` until the resource is // idle, so we should flush pending commands ConsiderFlush(GpuFlushType::ImplicitSynchronization); return false; } } else { if (isInUse) { // Make sure pending commands using the resource get // executed on the the GPU if we have to wait for it ExecuteFlush(GpuFlushType::ImplicitSynchronization, nullptr, false); SynchronizeCsThread(SequenceNumber); m_device->waitForResource(Resource, access); } } return true; } void D3D11ImmediateContext::EmitCsChunk(DxvkCsChunkRef&& chunk) { m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); } void D3D11ImmediateContext::TrackTextureSequenceNumber( D3D11CommonTexture* pResource, UINT Subresource) { uint64_t sequenceNumber = GetCurrentSequenceNumber(); pResource->TrackSequenceNumber(Subresource, sequenceNumber); ConsiderFlush(GpuFlushType::ImplicitStrongHint); } void D3D11ImmediateContext::TrackBufferSequenceNumber( D3D11Buffer* pResource) { uint64_t sequenceNumber = GetCurrentSequenceNumber(); pResource->TrackSequenceNumber(sequenceNumber); ConsiderFlush(GpuFlushType::ImplicitStrongHint); } uint64_t D3D11ImmediateContext::GetCurrentSequenceNumber() { // We do not flush empty chunks, so if we are tracking a resource // immediately after a flush, we need to use the sequence number // of the previously submitted chunk to prevent deadlocks. return m_csChunk->empty() ? m_csSeqNum : m_csSeqNum + 1; } uint64_t D3D11ImmediateContext::GetPendingCsChunks() { return GetCurrentSequenceNumber() - m_flushSeqNum; } void D3D11ImmediateContext::ConsiderFlush( GpuFlushType FlushType) { uint64_t chunkId = GetCurrentSequenceNumber(); uint64_t submissionId = m_submissionFence->value(); if (m_flushTracker.considerFlush(FlushType, chunkId, submissionId)) ExecuteFlush(FlushType, nullptr, false); } void D3D11ImmediateContext::ExecuteFlush( GpuFlushType FlushType, HANDLE hEvent, BOOL Synchronize) { bool synchronizeSubmission = Synchronize && m_parent->Is11on12Device(); if (synchronizeSubmission) m_submitStatus.result = VK_NOT_READY; // Flush init context so that new resources are fully initialized // before the app can access them in any way. This has to happen // unconditionally since we may otherwise deadlock on Map. m_parent->FlushInitContext(); // Exit early if there's nothing to do if (!GetPendingCsChunks() && !hEvent) return; // Signal the submission fence and flush the command list uint64_t submissionId = ++m_submissionId; if (hEvent) { m_submissionFence->setCallback(submissionId, [hEvent] { SetEvent(hEvent); }); } EmitCs([ cSubmissionFence = m_submissionFence, cSubmissionId = submissionId, cSubmissionStatus = synchronizeSubmission ? &m_submitStatus : nullptr ] (DxvkContext* ctx) { ctx->signal(cSubmissionFence, cSubmissionId); ctx->flushCommandList(cSubmissionStatus); }); FlushCsChunk(); // Notify flush tracker about the flush m_flushSeqNum = m_csSeqNum; m_flushTracker.notifyFlush(m_flushSeqNum, submissionId); // If necessary, block calling thread until the // Vulkan queue submission is performed. if (synchronizeSubmission) m_device->waitForSubmission(&m_submitStatus); } }