radv: implement vkCmdWaitEvents2KHR()/vkCmdPipelineBarrier2KHR()

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13549>
This commit is contained in:
Samuel Pitoiset 2021-10-12 18:41:34 +02:00 committed by Marge Bot
parent 57575974fd
commit 8df17163c7
4 changed files with 122 additions and 133 deletions

View File

@ -563,31 +563,18 @@ sqtt_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, sqtt_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, const VkDependencyInfoKHR* pDependencyInfos)
uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier *pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier *pImageMemoryBarriers)
{ {
EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask, EVENT_MARKER_ALIAS(WaitEvents2KHR, WaitEvents, commandBuffer, eventCount, pEvents,
memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, pDependencyInfos);
pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, sqtt_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
VkPipelineStageFlags destStageMask, VkBool32 byRegion, const VkDependencyInfoKHR* pDependencyInfo)
uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier *pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier *pImageMemoryBarriers)
{ {
EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, destStageMask, byRegion, EVENT_MARKER_ALIAS(PipelineBarrier2KHR, PipelineBarrier, commandBuffer, pDependencyInfo);
memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL

View File

@ -34,6 +34,7 @@
#include "sid.h" #include "sid.h"
#include "vk_format.h" #include "vk_format.h"
#include "vk_util.h" #include "vk_util.h"
#include "vk_enum_defines.h"
#include "ac_debug.h" #include "ac_debug.h"
@ -3725,29 +3726,29 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
} }
static void static void
radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_stage_mask) radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2KHR src_stage_mask)
{ {
if (src_stage_mask & if (src_stage_mask &
(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | (VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR |
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
} }
if (src_stage_mask & if (src_stage_mask &
(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR |
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR |
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
} else if (src_stage_mask & } else if (src_stage_mask &
(VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT_KHR |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) { VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
} }
} }
@ -3790,7 +3791,7 @@ can_skip_buffer_l2_flushes(struct radv_device *device)
*/ */
enum radv_cmd_flush_bits enum radv_cmd_flush_bits
radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags, radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2KHR src_flags,
const struct radv_image *image) const struct radv_image *image)
{ {
bool has_CB_meta = true, has_DB_meta = true; bool has_CB_meta = true, has_DB_meta = true;
@ -3804,10 +3805,10 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flag
has_DB_meta = false; has_DB_meta = false;
} }
u_foreach_bit(b, src_flags) u_foreach_bit64(b, src_flags)
{ {
switch ((VkAccessFlagBits)(1 << b)) { switch ((VkAccessFlags2KHR)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT: case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
/* since the STORAGE bit isn't set we know that this is a meta operation. /* since the STORAGE bit isn't set we know that this is a meta operation.
* on the dst flush side we skip CB/DB flushes without the STORAGE bit, so * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so
* set it here. */ * set it here. */
@ -3825,23 +3826,23 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flag
if (!image_is_coherent) if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_WB_L2; flush_bits |= RADV_CMD_FLAG_WB_L2;
break; break;
case VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
if (!image_is_coherent) if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_WB_L2; flush_bits |= RADV_CMD_FLAG_WB_L2;
break; break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
if (has_CB_meta) if (has_CB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
break; break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
if (has_DB_meta) if (has_DB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
break; break;
case VK_ACCESS_TRANSFER_WRITE_BIT: case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
if (!image_is_coherent) if (!image_is_coherent)
@ -3851,7 +3852,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flag
if (has_DB_meta) if (has_DB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
break; break;
case VK_ACCESS_MEMORY_WRITE_BIT: case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
if (!image_is_coherent) if (!image_is_coherent)
@ -3869,7 +3870,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flag
} }
enum radv_cmd_flush_bits enum radv_cmd_flush_bits
radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flags, radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2KHR dst_flags,
const struct radv_image *image) const struct radv_image *image)
{ {
bool has_CB_meta = true, has_DB_meta = true; bool has_CB_meta = true, has_DB_meta = true;
@ -3894,20 +3895,20 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag
image_is_coherent |= image_is_coherent |=
can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty; can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty;
u_foreach_bit(b, dst_flags) u_foreach_bit64(b, dst_flags)
{ {
switch ((VkAccessFlagBits)(1 << b)) { switch ((VkAccessFlags2KHR)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
case VK_ACCESS_INDEX_READ_BIT: case VK_ACCESS_2_INDEX_READ_BIT_KHR:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
break; break;
case VK_ACCESS_UNIFORM_READ_BIT: case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
break; break;
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
case VK_ACCESS_TRANSFER_READ_BIT: case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
case VK_ACCESS_TRANSFER_WRITE_BIT: case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE; flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
if (has_CB_meta || has_DB_meta) if (has_CB_meta || has_DB_meta)
@ -3915,7 +3916,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag
if (!image_is_coherent) if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_INV_L2; flush_bits |= RADV_CMD_FLAG_INV_L2;
break; break;
case VK_ACCESS_SHADER_READ_BIT: case VK_ACCESS_2_SHADER_READ_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE; flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
/* Unlike LLVM, ACO uses SMEM for SSBOs and we have to /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
* invalidate the scalar cache. */ * invalidate the scalar cache. */
@ -3927,30 +3928,30 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag
if (!image_is_coherent) if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_INV_L2; flush_bits |= RADV_CMD_FLAG_INV_L2;
break; break;
case VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR: case VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE; flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
flush_bits |= RADV_CMD_FLAG_INV_L2; flush_bits |= RADV_CMD_FLAG_INV_L2;
break; break;
case VK_ACCESS_SHADER_WRITE_BIT: case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
case VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
break; break;
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: case VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT_KHR:
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
if (flush_CB) if (flush_CB)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
if (has_CB_meta) if (has_CB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
break; break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT_KHR:
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
if (flush_DB) if (flush_DB)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
if (has_DB_meta) if (has_DB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
break; break;
case VK_ACCESS_MEMORY_READ_BIT: case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
case VK_ACCESS_MEMORY_WRITE_BIT: case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
if (!image_is_coherent) if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_INV_L2; flush_bits |= RADV_CMD_FLAG_INV_L2;
@ -7664,19 +7665,17 @@ struct radv_barrier_info {
enum rgp_barrier_reason reason; enum rgp_barrier_reason reason;
uint32_t eventCount; uint32_t eventCount;
const VkEvent *pEvents; const VkEvent *pEvents;
VkPipelineStageFlags srcStageMask;
VkPipelineStageFlags dstStageMask;
}; };
static void static void
radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount, radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfoKHR *dep_info,
const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const struct radv_barrier_info *info)
const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier *pImageMemoryBarriers, const struct radv_barrier_info *info)
{ {
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
enum radv_cmd_flush_bits src_flush_bits = 0; enum radv_cmd_flush_bits src_flush_bits = 0;
enum radv_cmd_flush_bits dst_flush_bits = 0; enum radv_cmd_flush_bits dst_flush_bits = 0;
VkPipelineStageFlags2KHR src_stage_mask = 0;
VkPipelineStageFlags2KHR dst_stage_mask = 0;
if (cmd_buffer->state.subpass) if (cmd_buffer->state.subpass)
radv_mark_noncoherent_rb(cmd_buffer); radv_mark_noncoherent_rb(cmd_buffer);
@ -7695,47 +7694,55 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount,
assert(cmd_buffer->cs->cdw <= cdw_max); assert(cmd_buffer->cs->cdw <= cdw_max);
} }
for (uint32_t i = 0; i < memoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask, NULL); src_stage_mask |= dep_info->pMemoryBarriers[i].srcStageMask;
dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, NULL); src_flush_bits |=
radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL);
dst_stage_mask |= dep_info->pMemoryBarriers[i].dstStageMask;
dst_flush_bits |=
radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL);
} }
for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
src_stage_mask |= dep_info->pBufferMemoryBarriers[i].srcStageMask;
src_flush_bits |= src_flush_bits |=
radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask, NULL); radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL);
dst_stage_mask |= dep_info->pBufferMemoryBarriers[i].dstStageMask;
dst_flush_bits |= dst_flush_bits |=
radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, NULL); radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL);
} }
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image);
src_stage_mask |= dep_info->pImageMemoryBarriers[i].srcStageMask;
src_flush_bits |= src_flush_bits |=
radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask, image); radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image);
dst_stage_mask |= dep_info->pImageMemoryBarriers[i].dstStageMask;
dst_flush_bits |= dst_flush_bits |=
radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, image); radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image);
} }
/* The Vulkan spec 1.1.98 says: /* The Vulkan spec 1.1.98 says:
* *
* "An execution dependency with only * "An execution dependency with only
* VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask * VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR in the destination stage mask
* will only prevent that stage from executing in subsequently * will only prevent that stage from executing in subsequently
* submitted commands. As this stage does not perform any actual * submitted commands. As this stage does not perform any actual
* execution, this is not observable - in effect, it does not delay * execution, this is not observable - in effect, it does not delay
* processing of subsequent commands. Similarly an execution dependency * processing of subsequent commands. Similarly an execution dependency
* with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask * with only VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR in the source stage mask
* will effectively not wait for any prior commands to complete." * will effectively not wait for any prior commands to complete."
*/ */
if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) if (dst_stage_mask != VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR)
radv_stage_flush(cmd_buffer, info->srcStageMask); radv_stage_flush(cmd_buffer, src_stage_mask);
cmd_buffer->state.flush_bits |= src_flush_bits; cmd_buffer->state.flush_bits |= src_flush_bits;
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image);
const struct VkSampleLocationsInfoEXT *sample_locs_info = const struct VkSampleLocationsInfoEXT *sample_locs_info =
vk_find_struct_const(pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT); vk_find_struct_const(dep_info->pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT);
struct radv_sample_locations_state sample_locations = {0}; struct radv_sample_locations_state sample_locations = {0};
if (sample_locs_info) { if (sample_locs_info) {
@ -7748,18 +7755,20 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount,
} }
radv_handle_image_transition( radv_handle_image_transition(
cmd_buffer, image, pImageMemoryBarriers[i].oldLayout, cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout,
false, /* Outside of a renderpass we are never in a renderloop */ false, /* Outside of a renderpass we are never in a renderloop */
pImageMemoryBarriers[i].newLayout, dep_info->pImageMemoryBarriers[i].newLayout,
false, /* Outside of a renderpass we are never in a renderloop */ false, /* Outside of a renderpass we are never in a renderloop */
pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex,
&pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL); dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex,
&dep_info->pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL);
} }
/* Make sure CP DMA is idle because the driver might have performed a /* Make sure CP DMA is idle because the driver might have performed a
* DMA operation for copying or filling buffers/images. * DMA operation for copying or filling buffers/images.
*/ */
if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) if (src_stage_mask & (VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR))
si_cp_dma_wait_for_idle(cmd_buffer); si_cp_dma_wait_for_idle(cmd_buffer);
cmd_buffer->state.flush_bits |= dst_flush_bits; cmd_buffer->state.flush_bits |= dst_flush_bits;
@ -7768,13 +7777,8 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount,
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
radv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, radv_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
VkPipelineStageFlags destStageMask, VkBool32 byRegion, const VkDependencyInfoKHR *pDependencyInfo)
uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier *pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier *pImageMemoryBarriers)
{ {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_barrier_info info; struct radv_barrier_info info;
@ -7782,11 +7786,8 @@ radv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcS
info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER; info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
info.eventCount = 0; info.eventCount = 0;
info.pEvents = NULL; info.pEvents = NULL;
info.srcStageMask = srcStageMask;
info.dstStageMask = destStageMask;
radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, radv_barrier(cmd_buffer, pDependencyInfo, &info);
pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info);
} }
static void static void
@ -7893,13 +7894,8 @@ radv_CmdResetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent _event,
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
radv_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, radv_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, const VkDependencyInfoKHR* pDependencyInfos)
uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier *pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier *pImageMemoryBarriers)
{ {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_barrier_info info; struct radv_barrier_info info;
@ -7907,10 +7903,8 @@ radv_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkE
info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS; info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS;
info.eventCount = eventCount; info.eventCount = eventCount;
info.pEvents = pEvents; info.pEvents = pEvents;
info.srcStageMask = 0;
radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, radv_barrier(cmd_buffer, pDependencyInfos, &info);
pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info);
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL

View File

@ -911,18 +911,22 @@ radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_imag
const uint32_t src_base_layer = const uint32_t src_base_layer =
radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset); radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
VkImageMemoryBarrier barrier = {0}; VkImageMemoryBarrier2KHR barrier = {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR,
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR,
barrier.oldLayout = src_image_layout; .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR,
barrier.image = radv_image_to_handle(src_image); .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT_KHR,
barrier.subresourceRange = (VkImageSubresourceRange){ .oldLayout = src_image_layout,
.aspectMask = region->srcSubresource.aspectMask, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.baseMipLevel = region->srcSubresource.mipLevel, .image = radv_image_to_handle(src_image),
.levelCount = 1, .subresourceRange = (VkImageSubresourceRange){
.baseArrayLayer = src_base_layer, .aspectMask = region->srcSubresource.aspectMask,
.layerCount = region->srcSubresource.layerCount, .baseMipLevel = region->srcSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = src_base_layer,
.layerCount = region->srcSubresource.layerCount,
}
}; };
if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) { if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
@ -941,7 +945,11 @@ radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_imag
}; };
} }
radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, struct VkDependencyInfoKHR dep_info = {
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, false, 0, NULL, 0, NULL, 1, .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
&barrier); .imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &barrier,
};
radv_CmdPipelineBarrier2KHR(radv_cmd_buffer_to_handle(cmd_buffer), &dep_info);
} }

View File

@ -1618,10 +1618,10 @@ void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im
void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *range, bool value); const VkImageSubresourceRange *range, bool value);
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
VkAccessFlags src_flags, VkAccessFlags2KHR src_flags,
const struct radv_image *image); const struct radv_image *image);
enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
VkAccessFlags dst_flags, VkAccessFlags2KHR dst_flags,
const struct radv_image *image); const struct radv_image *image);
uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,