From 5f3d3be24a7d9fa79020877c259c0740cfba12d4 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 16 Feb 2022 19:03:48 +0100 Subject: [PATCH] radv: fix indirect dispatches on the compute queue on GFX10.3+ For weird reasons, the COPY_DATA packet doesn't seem to copy anything while on the compute queue. Instead, use PKT3_LOAD_SH_REG_INDEX which seems to work as expected. Note that LOAD_SH_REG_INDEX on the compute queue is only supported by the CP on GFX10.3, so we need to implement a different solution (load from the indirect BO in the shader) for older generations. This should fix the Control RT GPU hang. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e9eeb3cc3c4..534f8077600 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -7266,14 +7266,24 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel radv_cs_add_buffer(ws, cs, info->indirect); if (loc->sgpr_idx != -1) { - for (unsigned i = 0; i < 3; ++i) { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, - COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG)); - radeon_emit(cs, (info->va + 4 * i)); - radeon_emit(cs, (info->va + 4 * i) >> 32); - radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); - radeon_emit(cs, 0); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3) { + unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4; + + radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0)); + radeon_emit(cs, info->va); + radeon_emit(cs, info->va >> 32); + radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, 3); + } else { + for (unsigned i = 0; i < 3; ++i) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG)); + radeon_emit(cs, (info->va + 4 * i)); + radeon_emit(cs, (info->va + 4 * i) >> 32); + radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); + radeon_emit(cs, 0); + } } }