[dxvk] Emit smarter post-renderpass barriers

Allows folding the old post-renderpass barrier into the same pipeline
barrier call as layout transitions from prepareImage or from starting
another render pass, thus reducing the overall number of pipeline
barriers. Also no longer uses VK_PIPELINE_STAGE_ALL_COMMANDS_BIT.
This commit is contained in:
Philip Rebohle 2022-07-04 14:59:03 +02:00
parent 67d35ecc8d
commit 0c5773dbb9
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
4 changed files with 130 additions and 73 deletions

View File

@ -333,6 +333,14 @@ namespace dxvk {
m_list.clear();
}
/**
* \brief Checks whether set is empty
* \returns \c true if there are no entries
*/
bool empty() const {
return m_used == 0;
}
private:
struct ListEntry {
@ -538,6 +546,11 @@ namespace dxvk {
void reset();
bool hasResourceBarriers() const {
return !m_bufSlices.empty()
|| !m_imgSlices.empty();
}
static DxvkAccessFlags getAccessTypes(VkAccessFlags flags);
private:

View File

@ -16,7 +16,6 @@ namespace dxvk {
m_initBarriers(DxvkCmdBuffer::InitBuffer),
m_execAcquires(DxvkCmdBuffer::ExecBuffer),
m_execBarriers(DxvkCmdBuffer::ExecBuffer),
m_gfxBarriers (DxvkCmdBuffer::ExecBuffer),
m_queryManager(m_common->queryPool()),
m_staging (device, StagingBufferSize) {
// Init framebuffer info with default render pass in case
@ -37,6 +36,33 @@ namespace dxvk {
}
m_descriptorManager = new DxvkDescriptorManager(device.ptr(), type);
// Default destination barriers for graphics pipelines
m_globalRoGraphicsBarrier.stages = m_device->getShaderPipelineStages()
| VK_PIPELINE_STAGE_TRANSFER_BIT
| VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
| VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
| VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
m_globalRoGraphicsBarrier.access = 0;
if (m_device->features().extTransformFeedback.transformFeedback)
m_globalRoGraphicsBarrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
m_globalRwGraphicsBarrier = m_globalRoGraphicsBarrier;
m_globalRwGraphicsBarrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
| VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
m_globalRwGraphicsBarrier.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT
| VK_ACCESS_INDEX_READ_BIT
| VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
| VK_ACCESS_UNIFORM_READ_BIT
| VK_ACCESS_SHADER_READ_BIT
| VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
| VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
| VK_ACCESS_TRANSFER_READ_BIT;
if (m_device->features().extTransformFeedback.transformFeedback)
m_globalRwGraphicsBarrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
}
@ -4020,15 +4046,15 @@ namespace dxvk {
if (suspend)
m_flags.set(DxvkContextFlag::GpRenderPassSuspended);
else
this->transitionRenderTargetLayouts(m_gfxBarriers, false);
this->transitionRenderTargetLayouts(m_execBarriers, false);
m_gfxBarriers.recordCommands(m_cmd);
m_execBarriers.recordCommands(m_cmd);
} else if (!suspend) {
// We may end a previously suspended render pass
if (m_flags.test(DxvkContextFlag::GpRenderPassSuspended)) {
m_flags.clr(DxvkContextFlag::GpRenderPassSuspended);
this->transitionRenderTargetLayouts(m_gfxBarriers, false);
m_gfxBarriers.recordCommands(m_cmd);
this->transitionRenderTargetLayouts(m_execBarriers, false);
m_execBarriers.recordCommands(m_cmd);
}
// Execute deferred clears if necessary
@ -4123,42 +4149,59 @@ namespace dxvk {
const auto& depthAttachment = framebufferInfo.getDepthTarget();
if (depthAttachment.view != nullptr) {
m_execBarriers.accessImage(
depthAttachment.view->image(),
depthAttachment.view->imageSubresources(),
depthAttachment.layout,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
ops.depthOps.storeLayout,
depthAttachment.view->imageInfo().stages,
depthAttachment.view->imageInfo().access);
if (depthAttachment.layout != ops.depthOps.storeLayout) {
m_execBarriers.accessImage(
depthAttachment.view->image(),
depthAttachment.view->imageSubresources(),
depthAttachment.layout,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
ops.depthOps.storeLayout,
depthAttachment.view->imageInfo().stages,
depthAttachment.view->imageInfo().access);
} else {
VkAccessFlags srcAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
if (depthAttachment.layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL)
srcAccess |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
m_execBarriers.accessMemory(
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
srcAccess,
depthAttachment.view->imageInfo().stages,
depthAttachment.view->imageInfo().access);
}
}
for (uint32_t i = 0; i < MaxNumRenderTargets; i++) {
const auto& colorAttachment = framebufferInfo.getColorTarget(i);
if (colorAttachment.view != nullptr) {
m_execBarriers.accessImage(
colorAttachment.view->image(),
colorAttachment.view->imageSubresources(),
colorAttachment.layout,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
ops.colorOps[i].storeLayout,
colorAttachment.view->imageInfo().stages,
colorAttachment.view->imageInfo().access);
if (colorAttachment.layout != ops.colorOps[i].storeLayout) {
m_execBarriers.accessImage(
colorAttachment.view->image(),
colorAttachment.view->imageSubresources(),
colorAttachment.layout,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
ops.colorOps[i].storeLayout,
colorAttachment.view->imageInfo().stages,
colorAttachment.view->imageInfo().access);
} else {
m_execBarriers.accessMemory(
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
colorAttachment.view->imageInfo().stages,
colorAttachment.view->imageInfo().access);
}
}
}
m_execBarriers.accessMemory(
ops.barrier.srcStages,
ops.barrier.srcAccess,
ops.barrier.dstStages,
ops.barrier.dstAccess);
// Do not flush barriers here. This is intended since
// we pre-record them when binding the framebuffer.
}
@ -4252,26 +4295,21 @@ namespace dxvk {
void DxvkContext::renderPassUnbindFramebuffer() {
m_cmd->cmdEndRendering();
// TODO Try to get rid of this for performance reasons.
// This only exists to emulate render pass barriers.
m_execBarriers.recordCommands(m_cmd);
// If there are pending layout transitions, execute them immediately
// since the backend expects images to be in the store layout after
// a render pass instance. This is expected to be rare.
if (m_execBarriers.hasResourceBarriers())
m_execBarriers.recordCommands(m_cmd);
}
void DxvkContext::resetRenderPassOps(
const DxvkRenderTargets& renderTargets,
DxvkRenderPassOps& renderPassOps) {
VkAccessFlags access = 0;
if (renderTargets.depth.view != nullptr) {
renderPassOps.depthOps = DxvkDepthAttachmentOps {
VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_LOAD,
renderTargets.depth.layout, renderTargets.depth.layout };
access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
if (renderTargets.depth.layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL)
access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
} else {
renderPassOps.depthOps = DxvkDepthAttachmentOps { };
}
@ -4282,18 +4320,10 @@ namespace dxvk {
VK_ATTACHMENT_LOAD_OP_LOAD,
renderTargets.color[i].layout,
renderTargets.color[i].layout };
access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
| VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
} else {
renderPassOps.colorOps[i] = DxvkColorAttachmentOps { };
}
}
renderPassOps.barrier.srcStages = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
renderPassOps.barrier.srcAccess = access;
renderPassOps.barrier.dstStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
renderPassOps.barrier.dstAccess = access;
}
@ -4447,7 +4477,7 @@ namespace dxvk {
}
bool DxvkContext::updateGraphicsPipelineState() {
bool DxvkContext::updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier) {
// Set up vertex buffer strides for active bindings
for (uint32_t i = 0; i < m_state.gp.state.il.bindingCount(); i++) {
const uint32_t binding = m_state.gp.state.ilBindings[i].binding();
@ -4487,6 +4517,21 @@ namespace dxvk {
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline);
// Emit barrier based on pipeline properties, in order to avoid
// accidental write-after-read hazards after the render pass.
DxvkGlobalPipelineBarrier pipelineBarrier = m_state.gp.pipeline->getGlobalBarrier(m_state.gp.state);
srcBarrier.stages |= pipelineBarrier.stages;
srcBarrier.access |= pipelineBarrier.access;
DxvkAccessFlags access = DxvkBarrierSet::getAccessTypes(srcBarrier.access);
DxvkGlobalPipelineBarrier dstBarrier = access.test(DxvkAccess::Write)
? m_globalRwGraphicsBarrier
: m_globalRoGraphicsBarrier;
m_execBarriers.accessMemory(
srcBarrier.stages, srcBarrier.access,
dstBarrier.stages, dstBarrier.access);
m_flags.clr(DxvkContextFlag::GpDirtyPipelineState);
return true;
}
@ -5148,7 +5193,19 @@ namespace dxvk {
this->updateGraphicsShaderResources();
if (m_flags.test(DxvkContextFlag::GpDirtyPipelineState)) {
if (unlikely(!this->updateGraphicsPipelineState()))
DxvkGlobalPipelineBarrier barrier = { };
if (Indexed) {
barrier.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
barrier.access |= VK_ACCESS_INDEX_READ_BIT;
}
if (Indirect) {
barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
barrier.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (unlikely(!this->updateGraphicsPipelineState(barrier)))
return false;
}
@ -5462,14 +5519,14 @@ namespace dxvk {
VkPipelineStageFlags stages,
VkAccessFlags access) {
if constexpr (DoEmit) {
m_gfxBarriers.accessBuffer(
m_execBarriers.accessBuffer(
slice.getSliceHandle(),
stages, access,
slice.bufferInfo().stages,
slice.bufferInfo().access);
return DxvkAccessFlags();
} else {
return m_gfxBarriers.getBufferAccess(slice.getSliceHandle());
return m_execBarriers.getBufferAccess(slice.getSliceHandle());
}
}
@ -5480,7 +5537,7 @@ namespace dxvk {
VkPipelineStageFlags stages,
VkAccessFlags access) {
if constexpr (DoEmit) {
m_gfxBarriers.accessImage(
m_execBarriers.accessImage(
imageView->image(),
imageView->imageSubresources(),
imageView->imageInfo().layout,
@ -5490,7 +5547,7 @@ namespace dxvk {
imageView->imageInfo().access);
return DxvkAccessFlags();
} else {
return m_gfxBarriers.getImageAccess(
return m_execBarriers.getImageAccess(
imageView->image(),
imageView->imageSubresources());
}

View File

@ -1137,12 +1137,14 @@ namespace dxvk {
DxvkBarrierSet m_initBarriers;
DxvkBarrierSet m_execAcquires;
DxvkBarrierSet m_execBarriers;
DxvkBarrierSet m_gfxBarriers;
DxvkBarrierControlFlags m_barrierControl;
DxvkGpuQueryManager m_queryManager;
DxvkStagingBuffer m_staging;
DxvkGlobalPipelineBarrier m_globalRoGraphicsBarrier;
DxvkGlobalPipelineBarrier m_globalRwGraphicsBarrier;
DxvkRenderTargetLayouts m_rtLayouts = { };
DxvkBindingSet<MaxNumVertexBindings + 1> m_vbTracked;
@ -1314,7 +1316,7 @@ namespace dxvk {
void unbindGraphicsPipeline();
bool updateGraphicsPipeline();
bool updateGraphicsPipelineState();
bool updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier);
void invalidateState();

View File

@ -41,20 +41,6 @@ namespace dxvk {
};
/**
* \brief Render pass barrier
*
* External subpass dependency that is to be
* executed after a render pass has completed.
*/
struct DxvkRenderPassBarrier {
VkPipelineStageFlags srcStages = 0;
VkAccessFlags srcAccess = 0;
VkPipelineStageFlags dstStages = 0;
VkAccessFlags dstAccess = 0;
};
/**
* \brief Render pass transitions
*
@ -63,7 +49,6 @@ namespace dxvk {
* from a group of render passes with the same format.
*/
struct DxvkRenderPassOps {
DxvkRenderPassBarrier barrier;
DxvkDepthAttachmentOps depthOps;
DxvkColorAttachmentOps colorOps[MaxNumRenderTargets];
};