radv: fix indirect dispatches on the compute queue on GFX10.3+
For weird reasons, the COPY_DATA packet doesn't seem to copy anything while on the compute queue. Instead, use PKT3_LOAD_SH_REG_INDEX which seems to work as expected. Note that LOAD_SH_REG_INDEX on the compute queue is only supported by the CP on GFX10.3, so we need to implement a different solution (load from the indirect BO in the shader) for older generations. This should fix the Control RT GPU hang. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15053>
This commit is contained in:
parent
53ccfbb996
commit
5f3d3be24a
|
@ -7266,14 +7266,24 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel
|
|||
radv_cs_add_buffer(ws, cs, info->indirect);
|
||||
|
||||
if (loc->sgpr_idx != -1) {
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs,
|
||||
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG));
|
||||
radeon_emit(cs, (info->va + 4 * i));
|
||||
radeon_emit(cs, (info->va + 4 * i) >> 32);
|
||||
radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
|
||||
radeon_emit(cs, 0);
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3) {
|
||||
unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0));
|
||||
radeon_emit(cs, info->va);
|
||||
radeon_emit(cs, info->va >> 32);
|
||||
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, 3);
|
||||
} else {
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs,
|
||||
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG));
|
||||
radeon_emit(cs, (info->va + 4 * i));
|
||||
radeon_emit(cs, (info->va + 4 * i) >> 32);
|
||||
radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue