[d3d11] Optimize UpdateSubresource for small buffer updates

Some games use UpdateSubresource to upload constant buffers in
between draws, so this path should be as fast as possible.

Also fixes a potential issue when using D3D11_COPY_NO_OVERWRITE
on deferred contexts, since the Map requirements don't hold here.
This commit is contained in:
Philip Rebohle 2022-02-06 18:20:31 +01:00
parent b746e1352b
commit 6c862b63a2
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
6 changed files with 290 additions and 159 deletions

View File

@ -903,134 +903,6 @@ namespace dxvk {
});
}
void STDMETHODCALLTYPE D3D11DeviceContext::UpdateSubresource(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch) {
UpdateSubresource1(pDstResource,
DstSubresource, pDstBox, pSrcData,
SrcRowPitch, SrcDepthPitch, 0);
}
void STDMETHODCALLTYPE D3D11DeviceContext::UpdateSubresource1(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch,
UINT CopyFlags) {
D3D10DeviceLock lock = LockContext();
if (!pDstResource)
return;
// Filter out invalid copy flags
CopyFlags &= D3D11_COPY_NO_OVERWRITE | D3D11_COPY_DISCARD;
// We need a different code path for buffers
D3D11_RESOURCE_DIMENSION resourceType;
pDstResource->GetType(&resourceType);
if (resourceType == D3D11_RESOURCE_DIMENSION_BUFFER) {
const auto bufferResource = static_cast<D3D11Buffer*>(pDstResource);
const auto bufferSlice = bufferResource->GetBufferSlice();
VkDeviceSize offset = bufferSlice.offset();
VkDeviceSize size = bufferSlice.length();
if (pDstBox != nullptr) {
offset = pDstBox->left;
size = pDstBox->right - pDstBox->left;
}
if (!size || offset + size > bufferSlice.length())
return;
bool useMap = (bufferSlice.buffer()->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
&& (size == bufferSlice.length() || CopyFlags);
if (useMap) {
D3D11_MAP mapType = (CopyFlags & D3D11_COPY_NO_OVERWRITE)
? D3D11_MAP_WRITE_NO_OVERWRITE
: D3D11_MAP_WRITE_DISCARD;
D3D11_MAPPED_SUBRESOURCE mappedSr;
if (likely(useMap = SUCCEEDED(Map(pDstResource, 0, mapType, 0, &mappedSr)))) {
std::memcpy(reinterpret_cast<char*>(mappedSr.pData) + offset, pSrcData, size);
Unmap(pDstResource, 0);
}
}
if (!useMap) {
DxvkDataSlice dataSlice = AllocUpdateBufferSlice(size);
std::memcpy(dataSlice.ptr(), pSrcData, size);
EmitCs([
cDataBuffer = std::move(dataSlice),
cBufferSlice = bufferSlice.subSlice(offset, size)
] (DxvkContext* ctx) {
ctx->updateBuffer(
cBufferSlice.buffer(),
cBufferSlice.offset(),
cBufferSlice.length(),
cDataBuffer.ptr());
});
}
} else {
D3D11CommonTexture* dstTexture = GetCommonTexture(pDstResource);
if (DstSubresource >= dstTexture->CountSubresources())
return;
VkFormat packedFormat = dstTexture->GetPackedFormat();
auto formatInfo = imageFormatInfo(packedFormat);
auto subresource = dstTexture->GetSubresourceFromIndex(
formatInfo->aspectMask, DstSubresource);
VkExtent3D mipExtent = dstTexture->MipLevelExtent(subresource.mipLevel);
VkOffset3D offset = { 0, 0, 0 };
VkExtent3D extent = mipExtent;
if (pDstBox != nullptr) {
if (pDstBox->left >= pDstBox->right
|| pDstBox->top >= pDstBox->bottom
|| pDstBox->front >= pDstBox->back)
return; // no-op, but legal
offset.x = pDstBox->left;
offset.y = pDstBox->top;
offset.z = pDstBox->front;
extent.width = pDstBox->right - pDstBox->left;
extent.height = pDstBox->bottom - pDstBox->top;
extent.depth = pDstBox->back - pDstBox->front;
}
if (!util::isBlockAligned(offset, extent, formatInfo->blockSize, mipExtent)) {
Logger::err("D3D11: UpdateSubresource1: Unaligned region");
return;
}
auto stagingSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, extent));
util::packImageData(stagingSlice.mapPtr(0),
pSrcData, SrcRowPitch, SrcDepthPitch, 0, 0,
dstTexture->GetVkImageType(), extent, 1,
formatInfo, formatInfo->aspectMask);
UpdateImage(dstTexture, &subresource,
offset, extent, std::move(stagingSlice));
}
}
HRESULT STDMETHODCALLTYPE D3D11DeviceContext::UpdateTileMappings(
ID3D11Resource* pTiledResource,
@ -3668,6 +3540,102 @@ namespace dxvk {
}
void D3D11DeviceContext::UpdateBuffer(
D3D11Buffer* pDstBuffer,
UINT Offset,
UINT Length,
const void* pSrcData) {
DxvkBufferSlice bufferSlice = pDstBuffer->GetBufferSlice(Offset, Length);
if (Length <= 65536) {
// The backend has special code paths for small buffer updates
DxvkDataSlice dataSlice = AllocUpdateBufferSlice(Length);
std::memcpy(dataSlice.ptr(), pSrcData, Length);
EmitCs([
cDataBuffer = std::move(dataSlice),
cBufferSlice = std::move(bufferSlice)
] (DxvkContext* ctx) {
ctx->updateBuffer(
cBufferSlice.buffer(),
cBufferSlice.offset(),
cBufferSlice.length(),
cDataBuffer.ptr());
});
} else {
// Otherwise, to avoid large data copies on the CS thread,
// write directly to a staging buffer and dispatch a copy
DxvkBufferSlice stagingSlice = AllocStagingBuffer(Length);
std::memcpy(stagingSlice.mapPtr(0), pSrcData, Length);
EmitCs([
cStagingSlice = std::move(stagingSlice),
cBufferSlice = std::move(bufferSlice)
] (DxvkContext* ctx) {
ctx->copyBuffer(
cBufferSlice.buffer(),
cBufferSlice.offset(),
cStagingSlice.buffer(),
cStagingSlice.offset(),
cBufferSlice.length());
});
}
}
void D3D11DeviceContext::UpdateTexture(
D3D11CommonTexture* pDstTexture,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch) {
if (DstSubresource >= pDstTexture->CountSubresources())
return;
VkFormat packedFormat = pDstTexture->GetPackedFormat();
auto formatInfo = imageFormatInfo(packedFormat);
auto subresource = pDstTexture->GetSubresourceFromIndex(
formatInfo->aspectMask, DstSubresource);
VkExtent3D mipExtent = pDstTexture->MipLevelExtent(subresource.mipLevel);
VkOffset3D offset = { 0, 0, 0 };
VkExtent3D extent = mipExtent;
if (pDstBox != nullptr) {
if (pDstBox->left >= pDstBox->right
|| pDstBox->top >= pDstBox->bottom
|| pDstBox->front >= pDstBox->back)
return; // no-op, but legal
offset.x = pDstBox->left;
offset.y = pDstBox->top;
offset.z = pDstBox->front;
extent.width = pDstBox->right - pDstBox->left;
extent.height = pDstBox->bottom - pDstBox->top;
extent.depth = pDstBox->back - pDstBox->front;
}
if (!util::isBlockAligned(offset, extent, formatInfo->blockSize, mipExtent)) {
Logger::err("D3D11: UpdateSubresource1: Unaligned region");
return;
}
auto stagingSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, extent));
util::packImageData(stagingSlice.mapPtr(0),
pSrcData, SrcRowPitch, SrcDepthPitch, 0, 0,
pDstTexture->GetVkImageType(), extent, 1,
formatInfo, formatInfo->aspectMask);
UpdateImage(pDstTexture, &subresource,
offset, extent, std::move(stagingSlice));
}
void D3D11DeviceContext::UpdateImage(
D3D11CommonTexture* pDstTexture,
const VkImageSubresource* pDstSubresource,

View File

@ -133,23 +133,6 @@ namespace dxvk {
void STDMETHODCALLTYPE GenerateMips(
ID3D11ShaderResourceView* pShaderResourceView);
void STDMETHODCALLTYPE UpdateSubresource(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch);
void STDMETHODCALLTYPE UpdateSubresource1(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch,
UINT CopyFlags);
HRESULT STDMETHODCALLTYPE UpdateTileMappings(
ID3D11Resource* pTiledResource,
UINT NumTiledResourceRegions,
@ -807,6 +790,82 @@ namespace dxvk {
ID3D11Resource* pResource,
UINT Subresource);
template<typename ContextType>
static void UpdateResource(
ContextType* pContext,
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch,
UINT CopyFlags) {
D3D10DeviceLock lock = pContext->LockContext();
if (!pDstResource)
return;
// We need a different code path for buffers
D3D11_RESOURCE_DIMENSION resourceType;
pDstResource->GetType(&resourceType);
if (likely(resourceType == D3D11_RESOURCE_DIMENSION_BUFFER)) {
const auto bufferResource = static_cast<D3D11Buffer*>(pDstResource);
uint64_t bufferSize = bufferResource->Desc()->ByteWidth;
// Provide a fast path for mapped buffer updates since some
// games use UpdateSubresource to update constant buffers.
if (likely(bufferResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_DIRECT) && likely(!pDstBox)) {
pContext->UpdateMappedBuffer(bufferResource, 0, bufferSize, pSrcData, 0);
return;
}
// Validate buffer range to update
uint64_t offset = 0;
uint64_t length = bufferSize;
if (pDstBox) {
offset = pDstBox->left;
length = pDstBox->right - offset;
}
if (unlikely(offset + length > bufferSize))
return;
// Still try to be fast if a box is provided but we update the full buffer
if (likely(bufferResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_DIRECT)) {
CopyFlags &= D3D11_COPY_DISCARD | D3D11_COPY_NO_OVERWRITE;
if (likely(length == bufferSize) || unlikely(CopyFlags != 0)) {
pContext->UpdateMappedBuffer(bufferResource, offset, length, pSrcData, CopyFlags);
return;
}
}
// Otherwise we can't really do anything fancy, so just do a GPU copy
pContext->UpdateBuffer(bufferResource, offset, length, pSrcData);
} else {
D3D11CommonTexture* textureResource = GetCommonTexture(pDstResource);
pContext->UpdateTexture(textureResource,
DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch);
}
}
void UpdateBuffer(
D3D11Buffer* pDstBuffer,
UINT Offset,
UINT Length,
const void* pSrcData);
void UpdateTexture(
D3D11CommonTexture* pDstTexture,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch);
void UpdateImage(
D3D11CommonTexture* pDstTexture,
const VkImageSubresource* pDstSubresource,
@ -921,7 +980,7 @@ namespace dxvk {
UINT NumViews,
ID3D11RenderTargetView* const* ppRenderTargetViews,
ID3D11DepthStencilView* pDepthStencilView);
VkClearValue ConvertColorValue(
const FLOAT Color[4],
const DxvkFormatInfo* pFormatInfo);

View File

@ -246,6 +246,31 @@ namespace dxvk {
}
void STDMETHODCALLTYPE D3D11DeferredContext::UpdateSubresource(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch) {
UpdateResource<D3D11DeferredContext>(this, pDstResource,
DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, 0);
}
void STDMETHODCALLTYPE D3D11DeferredContext::UpdateSubresource1(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch,
UINT CopyFlags) {
UpdateResource<D3D11DeferredContext>(this, pDstResource,
DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, CopyFlags);
}
void STDMETHODCALLTYPE D3D11DeferredContext::SwapDeviceContextState(
ID3DDeviceContextState* pState,
ID3DDeviceContextState** ppPreviousState) {
@ -342,6 +367,35 @@ namespace dxvk {
}
void D3D11DeferredContext::UpdateMappedBuffer(
D3D11Buffer* pDstBuffer,
UINT Offset,
UINT Length,
const void* pSrcData,
UINT CopyFlags) {
void* mapPtr = nullptr;
if (unlikely(CopyFlags == D3D11_COPY_NO_OVERWRITE)) {
auto entry = FindMapEntry(pDstBuffer, 0);
if (entry != m_mappedResources.rend())
mapPtr = entry->MapPointer;
}
if (likely(!mapPtr)) {
// The caller validates the map mode, so we can
// safely ignore the MapBuffer return value here
D3D11DeferredContextMapEntry entry;
MapBuffer(pDstBuffer, &entry);
mapPtr = entry.MapPointer;
m_mappedResources.push_back(std::move(entry));
}
std::memcpy(reinterpret_cast<char*>(mapPtr) + Offset, pSrcData, Length);
}
void D3D11DeferredContext::FinalizeQueries() {
for (auto& query : m_queriesBegun) {
m_commandList->AddQuery(query.ptr());

View File

@ -19,7 +19,7 @@ namespace dxvk {
};
class D3D11DeferredContext : public D3D11DeviceContext {
friend class D3D11DeviceContext;
public:
D3D11DeferredContext(
@ -76,6 +76,23 @@ namespace dxvk {
ID3D11Resource* pResource,
UINT Subresource);
void STDMETHODCALLTYPE UpdateSubresource(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch);
void STDMETHODCALLTYPE UpdateSubresource1(
ID3D11Resource* pDstResource,
UINT DstSubresource,
const D3D11_BOX* pDstBox,
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch,
UINT CopyFlags);
void STDMETHODCALLTYPE SwapDeviceContextState(
ID3DDeviceContextState* pState,
ID3DDeviceContextState** ppPreviousState);
@ -103,7 +120,14 @@ namespace dxvk {
ID3D11Resource* pResource,
UINT Subresource,
D3D11DeferredContextMapEntry* pMapEntry);
void UpdateMappedBuffer(
D3D11Buffer* pDstBuffer,
UINT Offset,
UINT Length,
const void* pSrcData,
UINT CopyFlags);
void FinalizeQueries();
Com<D3D11CommandList> CreateCommandList();

View File

@ -288,11 +288,8 @@ namespace dxvk {
const void* pSrcData,
UINT SrcRowPitch,
UINT SrcDepthPitch) {
FlushImplicit(FALSE);
D3D11DeviceContext::UpdateSubresource(
pDstResource, DstSubresource, pDstBox,
pSrcData, SrcRowPitch, SrcDepthPitch);
UpdateResource<D3D11ImmediateContext>(this, pDstResource,
DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, 0);
}
@ -304,12 +301,8 @@ namespace dxvk {
UINT SrcRowPitch,
UINT SrcDepthPitch,
UINT CopyFlags) {
FlushImplicit(FALSE);
D3D11DeviceContext::UpdateSubresource1(
pDstResource, DstSubresource, pDstBox,
pSrcData, SrcRowPitch, SrcDepthPitch,
CopyFlags);
UpdateResource<D3D11ImmediateContext>(this, pDstResource,
DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, CopyFlags);
}
@ -497,6 +490,31 @@ namespace dxvk {
}
void D3D11ImmediateContext::UpdateMappedBuffer(
D3D11Buffer* pDstBuffer,
UINT Offset,
UINT Length,
const void* pSrcData,
UINT CopyFlags) {
DxvkBufferSliceHandle slice;
if (likely(CopyFlags != D3D11_COPY_NO_OVERWRITE)) {
slice = pDstBuffer->DiscardSlice();
EmitCs([
cBuffer = pDstBuffer->GetBuffer(),
cBufferSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cBufferSlice);
});
} else {
slice = pDstBuffer->GetMappedSlice();
}
std::memcpy(reinterpret_cast<char*>(slice.mapPtr) + Offset, pSrcData, Length);
}
void STDMETHODCALLTYPE D3D11ImmediateContext::SwapDeviceContextState(
ID3DDeviceContextState* pState,
ID3DDeviceContextState** ppPreviousState) {

View File

@ -16,6 +16,7 @@ namespace dxvk {
class D3D11ImmediateContext : public D3D11DeviceContext {
friend class D3D11SwapChain;
friend class D3D11VideoContext;
friend class D3D11DeviceContext;
public:
D3D11ImmediateContext(
@ -144,6 +145,13 @@ namespace dxvk {
D3D11CommonTexture* pResource,
UINT Subresource);
void UpdateMappedBuffer(
D3D11Buffer* pDstBuffer,
UINT Offset,
UINT Length,
const void* pSrcData,
UINT CopyFlags);
void SynchronizeDevice();
bool WaitForResource(