radv: Refactor predication for compute queues.

Initialize the inverted predication VA only when it is used
for the first time.

This is needed to get conditional rendering work correctly with
task shaders because the internal compute cmdbuf may not exist
yet when conditional rendering starts.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16531>
This commit is contained in:
Timur Kristóf 2022-06-16 16:40:45 +02:00 committed by Marge Bot
parent 7131a05f92
commit e10cbb5d98
2 changed files with 67 additions and 59 deletions

View File

@ -533,6 +533,16 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->descriptors[i].push_dirty = false;
}
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
uint32_t pred_value = 0;
uint32_t pred_offset;
if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset))
cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
cmd_buffer->mec_inv_pred_emitted = false;
cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
}
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
cmd_buffer->qf == RADV_QUEUE_GENERAL) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
@ -6301,18 +6311,56 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
* Emulates predication for MEC using COND_EXEC.
* When the current command buffer is predicating, emit a COND_EXEC packet
* so that the MEC skips the next few dwords worth of packets.
*
* To make it work with inverted conditional rendering, we allocate
* space in the upload BO and emit some packets to invert the condition.
*/
static void
radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords)
radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs,
uint64_t inv_va, bool *inv_emitted, unsigned dwords)
{
if (!cmd_buffer->state.predicating)
if (!state->predicating)
return;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = state->predication_va;
if (!state->predication_type) {
/* Invert the condition the first time it is needed. */
if (!*inv_emitted) {
*inv_emitted = true;
/* Write 1 to the inverted predication VA. */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 1);
radeon_emit(cs, 0);
radeon_emit(cs, inv_va);
radeon_emit(cs, inv_va >> 32);
/* If the API predication VA == 0, skip next command. */
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, 0);
radeon_emit(cs, 6); /* 1x COPY_DATA size */
/* Write 0 to the new predication VA (when the API condition != 0) */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, inv_va);
radeon_emit(cs, inv_va >> 32);
}
va = inv_va;
}
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cs, cmd_buffer->state.predication_va);
radeon_emit(cs, cmd_buffer->state.predication_va >> 32);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, 0); /* Cache policy */
radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
}
@ -7649,7 +7697,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */);
radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
&cmd_buffer->mec_inv_pred_emitted,
4 /* DISPATCH_INDIRECT size */);
radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, info->va);
radeon_emit(cs, info->va >> 32);
@ -7732,7 +7782,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */);
radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
&cmd_buffer->mec_inv_pred_emitted,
5 /* DISPATCH_DIRECT size */);
predicating = false;
}
@ -9096,54 +9148,8 @@ radv_CmdBeginConditionalRenderingEXT(
pred_op = PREDICATION_OP_BOOL64;
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
/* MEC does not support real predication, so we have to emulate that
* using COND_EXEC packets before each dispatch.
*/
if (!draw_visible) {
/* COND_EXEC can only skip the next packet(s) when the condition is 0.
* When using inverted conditional rendering, we allocate some space in
* the upload BO and emit some packets to invert the condition.
*/
uint64_t pred_value = 0, pred_va;
unsigned pred_offset;
radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset);
pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
/* Write 1 to the new predication VA. */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 1);
radeon_emit(cs, 0);
radeon_emit(cs, pred_va);
radeon_emit(cs, pred_va >> 32);
/* If the API predication VA == 0, skip next command. */
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, 0);
radeon_emit(cs, 6); /* 1x COPY_DATA size */
/* Write 0 to the new predication VA (when the API condition != 0) */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
COPY_DATA_WR_CONFIRM);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, pred_va);
radeon_emit(cs, pred_va >> 32);
va = pred_va;
draw_visible = true;
}
} else {
/* Enable predication for this command buffer. */
/* MEC doesn't support predication, we emulate it elsewhere. */
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
}
@ -9152,6 +9158,7 @@ radv_CmdBeginConditionalRenderingEXT(
cmd_buffer->state.predication_type = draw_visible;
cmd_buffer->state.predication_op = pred_op;
cmd_buffer->state.predication_va = va;
cmd_buffer->mec_inv_pred_emitted = false;
}
VKAPI_ATTR void VKAPI_CALL
@ -9159,11 +9166,8 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
/* Note, MEC doesn't support predication, so we
* don't have to emit anything for MEC here.
*/
/* MEC doesn't support predication, no need to emit anything here. */
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
/* Disable predication for this command buffer. */
si_emit_set_predication_state(cmd_buffer, false, 0, 0);
}
@ -9172,6 +9176,7 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
cmd_buffer->state.predication_type = -1;
cmd_buffer->state.predication_op = 0;
cmd_buffer->state.predication_va = 0;
cmd_buffer->mec_inv_pred_emitted = false;
}
/* VK_EXT_transform_feedback */

View File

@ -1634,6 +1634,9 @@ struct radv_cmd_buffer {
uint32_t gfx9_fence_idx;
uint64_t gfx9_eop_bug_va;
uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
/**
* Whether a query pool has been resetted and we have to flush caches.
*/