[d3d11] Allocate host-readable images on host-visible memory

Not having to wait for an image->buffer copy to finish on the
GPU allows for more efficient synchronization. Significantly
improves performance in The Witcher 3.
This commit is contained in:
Philip Rebohle 2018-03-10 23:32:15 +01:00
parent a4a8e0d6c8
commit d3e89b20dd
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
4 changed files with 162 additions and 103 deletions

View File

@ -120,17 +120,8 @@ namespace dxvk {
cPhysicalSlice.resource()->release();
});
} else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) {
// Synchronize with CS thread so that we know whether
// the buffer is currently in use by the GPU or not
Flush();
SynchronizeCsThread();
while (buffer->isInUse()) {
if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT)
return DXGI_ERROR_WAS_STILL_DRAWING;
SynchronizeDevice();
}
if (!WaitForResource(buffer->resource(), MapFlags))
return DXGI_ERROR_WAS_STILL_DRAWING;
}
// Use map pointer from previous map operation. This
@ -144,79 +135,103 @@ namespace dxvk {
pMappedResource->DepthPitch = physicalSlice.length();
return S_OK;
} else {
// Mapping an image is sadly not as simple as mapping a buffer
// because applications tend to ignore row and layer strides.
// We use a buffer instead and then perform a copy.
// Depending on whether the image has been allocated on a
// host-visible memory type, we can either use the mapped
// memory region directly, or we map a linear buffer.
D3D11TextureInfo* textureInfo = GetCommonTextureInfo(pResource);
if (textureInfo->imageBuffer == nullptr) {
Logger::err("D3D11: Cannot map a device-local image");
return E_INVALIDARG;
}
if (pMappedResource == nullptr)
return S_FALSE;
// Query format and subresource in order to compute
// the row pitch and layer pitch properly.
const DxvkImageCreateInfo& imageInfo = textureInfo->image->info();
const DxvkFormatInfo* formatInfo = imageFormatInfo(imageInfo.format);
textureInfo->mappedSubresource =
GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT,
imageInfo.mipLevels, Subresource);
textureInfo->mappedSubresource = GetSubresourceFromIndex(
VK_IMAGE_ASPECT_COLOR_BIT, imageInfo.mipLevels, Subresource);
const VkExtent3D levelExtent = textureInfo->image
->mipLevelExtent(textureInfo->mappedSubresource.mipLevel);
const VkExtent3D blockCount = util::computeBlockCount(
levelExtent, formatInfo->blockSize);
DxvkPhysicalBufferSlice physicalSlice;
// When using any map mode which requires the image contents
// to be preserved, copy the image's contents into the buffer.
if (MapType == D3D11_MAP_WRITE_DISCARD) {
physicalSlice = textureInfo->imageBuffer->allocPhysicalSlice();
physicalSlice.resource()->acquire();
if (textureInfo->image->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
const VkSubresourceLayout subresourceLayout =
textureInfo->image->querySubresourceLayout(
textureInfo->mappedSubresource);
EmitCs([
cImageBuffer = textureInfo->imageBuffer,
cPhysicalSlice = physicalSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cImageBuffer, cPhysicalSlice);
cPhysicalSlice.resource()->release();
});
if (!WaitForResource(textureInfo->image, MapFlags))
return DXGI_ERROR_WAS_STILL_DRAWING;
pMappedResource->pData = textureInfo->image->mapPtr(subresourceLayout.offset);
pMappedResource->RowPitch = subresourceLayout.rowPitch;
pMappedResource->DepthPitch = subresourceLayout.rowPitch * imageInfo.extent.height;
if (imageInfo.type == VK_IMAGE_TYPE_3D)
pMappedResource->DepthPitch = subresourceLayout.depthPitch;
else if (imageInfo.numLayers > 1)
pMappedResource->DepthPitch = subresourceLayout.arrayPitch;
return S_OK;
} else {
const VkImageSubresourceLayers subresourceLayers = {
textureInfo->mappedSubresource.aspectMask,
textureInfo->mappedSubresource.mipLevel,
textureInfo->mappedSubresource.arrayLayer, 1 };
if (textureInfo->imageBuffer == nullptr) {
Logger::err("D3D11: Cannot map a device-local image");
return E_INVALIDARG;
}
EmitCs([
cImageBuffer = textureInfo->imageBuffer,
cImage = textureInfo->image,
cSubresources = subresourceLayers,
cLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyImageToBuffer(
cImageBuffer, 0, VkExtent2D { 0u, 0u },
cImage, cSubresources, VkOffset3D { 0, 0, 0 },
cLevelExtent);
});
if (pMappedResource == nullptr)
return S_FALSE;
Flush();
SynchronizeCsThread();
SynchronizeDevice();
// Query format info in order to compute
// the row pitch and layer pitch properly.
const DxvkFormatInfo* formatInfo = imageFormatInfo(imageInfo.format);
physicalSlice = textureInfo->imageBuffer->slice();
const VkExtent3D levelExtent = textureInfo->image
->mipLevelExtent(textureInfo->mappedSubresource.mipLevel);
const VkExtent3D blockCount = util::computeBlockCount(
levelExtent, formatInfo->blockSize);
DxvkPhysicalBufferSlice physicalSlice;
// When using any map mode which requires the image contents
// to be preserved, copy the image's contents into the buffer.
if (MapType == D3D11_MAP_WRITE_DISCARD) {
physicalSlice = textureInfo->imageBuffer->allocPhysicalSlice();
physicalSlice.resource()->acquire();
EmitCs([
cImageBuffer = textureInfo->imageBuffer,
cPhysicalSlice = physicalSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cImageBuffer, cPhysicalSlice);
cPhysicalSlice.resource()->release();
});
} else {
// We may have to copy the current image contents into the
// mapped buffer if the GPU has write access to the image.
const bool copyExistingData = textureInfo->usage == D3D11_USAGE_STAGING;
if (copyExistingData) {
const VkImageSubresourceLayers subresourceLayers = {
textureInfo->mappedSubresource.aspectMask,
textureInfo->mappedSubresource.mipLevel,
textureInfo->mappedSubresource.arrayLayer, 1 };
EmitCs([
cImageBuffer = textureInfo->imageBuffer,
cImage = textureInfo->image,
cSubresources = subresourceLayers,
cLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyImageToBuffer(
cImageBuffer, 0, VkExtent2D { 0u, 0u },
cImage, cSubresources, VkOffset3D { 0, 0, 0 },
cLevelExtent);
});
}
if (!WaitForResource(textureInfo->imageBuffer->resource(), MapFlags))
return DXGI_ERROR_WAS_STILL_DRAWING;
physicalSlice = textureInfo->imageBuffer->slice();
}
// Set up map pointer. Data is tightly packed within the mapped buffer.
pMappedResource->pData = physicalSlice.mapPtr(0);
pMappedResource->RowPitch = formatInfo->elementSize * blockCount.width;
pMappedResource->DepthPitch = formatInfo->elementSize * blockCount.width * blockCount.height;
return S_OK;
}
// Set up map pointer. Data is tightly packed within the mapped buffer.
pMappedResource->pData = physicalSlice.mapPtr(0);
pMappedResource->RowPitch = formatInfo->elementSize * blockCount.width;
pMappedResource->DepthPitch = formatInfo->elementSize * blockCount.width * blockCount.height;
return S_OK;
}
}
@ -233,24 +248,26 @@ namespace dxvk {
const D3D11TextureInfo* textureInfo
= GetCommonTextureInfo(pResource);
const VkExtent3D levelExtent = textureInfo->image
->mipLevelExtent(textureInfo->mappedSubresource.mipLevel);
const VkImageSubresourceLayers subresourceLayers = {
textureInfo->mappedSubresource.aspectMask,
textureInfo->mappedSubresource.mipLevel,
textureInfo->mappedSubresource.arrayLayer, 1 };
EmitCs([
cSrcBuffer = textureInfo->imageBuffer,
cDstImage = textureInfo->image,
cDstLayers = subresourceLayers,
cDstLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyBufferToImage(cDstImage, cDstLayers,
VkOffset3D { 0, 0, 0 }, cDstLevelExtent,
cSrcBuffer, 0, { 0u, 0u });
});
if (!(textureInfo->image->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
const VkExtent3D levelExtent = textureInfo->image
->mipLevelExtent(textureInfo->mappedSubresource.mipLevel);
const VkImageSubresourceLayers subresourceLayers = {
textureInfo->mappedSubresource.aspectMask,
textureInfo->mappedSubresource.mipLevel,
textureInfo->mappedSubresource.arrayLayer, 1 };
EmitCs([
cSrcBuffer = textureInfo->imageBuffer,
cDstImage = textureInfo->image,
cDstLayers = subresourceLayers,
cDstLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyBufferToImage(cDstImage, cDstLayers,
VkOffset3D { 0, 0, 0 }, cDstLevelExtent,
cSrcBuffer, 0, { 0u, 0u });
});
}
}
}
@ -265,14 +282,29 @@ namespace dxvk {
void D3D11ImmediateContext::SynchronizeDevice() {
// FIXME waiting until the device finished executing *all*
// pending commands is too pessimistic. Instead we should
// wait for individual command submissions to complete.
// This will require changes in the DxvkDevice class.
m_device->waitForIdle();
}
bool D3D11ImmediateContext::WaitForResource(
const Rc<DxvkResource>& Resource,
UINT MapFlags) {
// Wait for the any pending D3D11 command to be executed
// on the CS thread so that we can determine whether the
// resource is currently in use or not.
Flush();
SynchronizeCsThread();
if (Resource->isInUse()) {
// TODO implement properly in DxvkDevice
while (Resource->isInUse())
std::this_thread::yield();
}
return true;
}
void D3D11ImmediateContext::EmitCsChunk(Rc<DxvkCsChunk>&& chunk) {
m_csThread.dispatchChunk(std::move(chunk));
}

View File

@ -50,6 +50,10 @@ namespace dxvk {
void SynchronizeDevice();
bool WaitForResource(
const Rc<DxvkResource>& Resource,
UINT MapFlags);
void EmitCsChunk(Rc<DxvkCsChunk>&& chunk) final;
};

View File

@ -22,7 +22,6 @@ namespace dxvk {
return DxgiFormatMode::Any;
}
/**
* \brief Optimizes image layout based on usage flags
*
@ -144,8 +143,10 @@ namespace dxvk {
if (CPUAccessFlags != 0) {
pImageInfo->stages |= VK_PIPELINE_STAGE_HOST_BIT;
if (CPUAccessFlags & D3D11_CPU_ACCESS_WRITE)
if (CPUAccessFlags & D3D11_CPU_ACCESS_WRITE) {
pImageInfo->access |= VK_ACCESS_HOST_WRITE_BIT;
pImageInfo->tiling = VK_IMAGE_TILING_LINEAR;
}
if (CPUAccessFlags & D3D11_CPU_ACCESS_READ)
pImageInfo->access |= VK_ACCESS_HOST_READ_BIT;
@ -159,6 +160,29 @@ namespace dxvk {
}
/**
* \brief Retrieves memory flags for image usage
*
* If the host requires access to the image, we
* should create it on a host-visible memory type.
* \param [in] Usage Image usage flags
* \returns Image memory properties
*/
static VkMemoryPropertyFlags GetImageMemoryFlags(UINT CPUAccessFlags) {
if (CPUAccessFlags & D3D11_CPU_ACCESS_READ) {
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
} else {
// If only write access is required, we will emulate
// image mapping through a buffer. Some games ignore
// the row pitch when mapping images, which leads to
// incorrect rendering.
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
}
}
D3D11Texture1D::D3D11Texture1D(
D3D11Device* pDevice,
const D3D11_TEXTURE1D_DESC* pDesc)
@ -201,7 +225,7 @@ namespace dxvk {
// Create the image and, if necessary, the image buffer
m_texInfo.formatMode = formatMode;
m_texInfo.image = pDevice->GetDXVKDevice()->createImage(
info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
info, GetImageMemoryFlags(m_desc.CPUAccessFlags));
m_texInfo.imageBuffer = m_desc.CPUAccessFlags != 0
? CreateImageBuffer(pDevice->GetDXVKDevice(), info.format, info.extent)
: nullptr;
@ -301,7 +325,7 @@ namespace dxvk {
// Create the image and, if necessary, the image buffer
m_texInfo.formatMode = formatMode;
m_texInfo.image = pDevice->GetDXVKDevice()->createImage(
info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
info, GetImageMemoryFlags(m_desc.CPUAccessFlags));
m_texInfo.imageBuffer = m_desc.CPUAccessFlags != 0
? CreateImageBuffer(pDevice->GetDXVKDevice(), info.format, info.extent)
: nullptr;
@ -354,7 +378,7 @@ namespace dxvk {
///////////////////////////////////////////
// D 3 D 1 1 T E X T U R E 2 D
// D 3 D 1 1 T E X T U R E 3 D
D3D11Texture3D::D3D11Texture3D(
D3D11Device* pDevice,
const D3D11_TEXTURE3D_DESC* pDesc)
@ -398,7 +422,7 @@ namespace dxvk {
// Create the image and, if necessary, the image buffer
m_texInfo.formatMode = formatMode;
m_texInfo.image = pDevice->GetDXVKDevice()->createImage(
info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
info, GetImageMemoryFlags(m_desc.CPUAccessFlags));
m_texInfo.imageBuffer = m_desc.CPUAccessFlags != 0
? CreateImageBuffer(pDevice->GetDXVKDevice(), info.format, info.extent)
: nullptr;

View File

@ -28,7 +28,6 @@ namespace dxvk {
};
///////////////////////////////////////////
// D 3 D 1 1 T E X T U R E 1 D
class D3D11Texture1D : public D3D11DeviceChild<ID3D11Texture1D> {