radv: add a new mechanism for tracking registers per cmdbuf

We already track a couple of registers per cmdbuf and this introduces
a generic mechanism, instead of having a bunch of last_xxx fields.

Loosely based on RadeonSI.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28644>
This commit is contained in:
Samuel Pitoiset 2024-04-08 18:48:46 +02:00 committed by Marge Bot
parent 39a9f68685
commit 1173058002
3 changed files with 87 additions and 64 deletions

View File

@ -399,6 +399,16 @@ radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer)
memset(&cmd_buffer->state.render, 0, sizeof(cmd_buffer->state.render));
}
static void
radv_reset_tracked_regs(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_tracked_regs *tracked_regs = &cmd_buffer->tracked_regs;
/* Mark all registers as unknown. */
memset(tracked_regs->reg_value, 0, RADV_NUM_ALL_TRACKED_REGS * sizeof(uint32_t));
BITSET_ZERO(tracked_regs->reg_saved_mask);
}
static void
radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags)
{
@ -1638,12 +1648,8 @@ radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer)
pa_sc_binner_cntl_0 = radv_get_binning_state(cmd_buffer);
if (pa_sc_binner_cntl_0 == cmd_buffer->state.last_pa_sc_binner_cntl_0)
return;
radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, pa_sc_binner_cntl_0);
cmd_buffer->state.last_pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
radeon_opt_set_context_reg(cmd_buffer, R_028C44_PA_SC_BINNER_CNTL_0, RADV_TRACKED_PA_SC_BINNER_CNTL_0,
pa_sc_binner_cntl_0);
}
static void
@ -1837,18 +1843,8 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
* breaks dual source blending in SkQP and does not seem to improve
* performance. */
if (sx_ps_downconvert != cmd_buffer->state.last_sx_ps_downconvert ||
sx_blend_opt_epsilon != cmd_buffer->state.last_sx_blend_opt_epsilon ||
sx_blend_opt_control != cmd_buffer->state.last_sx_blend_opt_control) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
}
radeon_opt_set_context_reg3(cmd_buffer, R_028754_SX_PS_DOWNCONVERT, RADV_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_RBPLUS;
}
@ -2525,10 +2521,8 @@ radv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer)
if (en && gfx_level <= GFX7) {
const uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
if (primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) {
radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
cmd_buffer->state.last_primitive_reset_index = primitive_reset_index;
}
radeon_opt_set_context_reg(cmd_buffer, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
RADV_TRACKED_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
}
}
}
@ -3879,13 +3873,7 @@ radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer)
}
}
if (db_count_control != cmd_buffer->state.last_db_count_control) {
radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
cmd_buffer->state.context_roll_without_scissor_emitted = true;
cmd_buffer->state.last_db_count_control = db_count_control;
}
radeon_opt_set_context_reg(cmd_buffer, R_028004_DB_COUNT_CONTROL, RADV_TRACKED_DB_COUNT_CONTROL, db_count_control);
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_OCCLUSION_QUERY;
}
@ -6073,15 +6061,12 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
cmd_buffer->state.last_drawid = -1;
cmd_buffer->state.last_subpass_color_count = MAX_RTS;
cmd_buffer->state.predication_type = -1;
cmd_buffer->state.last_sx_ps_downconvert = -1;
cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
cmd_buffer->state.last_sx_blend_opt_control = -1;
cmd_buffer->state.mesh_shading = false;
cmd_buffer->state.last_vrs_rates = -1;
cmd_buffer->state.last_vrs_rates_sgpr_idx = -1;
cmd_buffer->state.last_pa_sc_binner_cntl_0 = -1;
cmd_buffer->state.last_db_count_control = -1;
cmd_buffer->state.last_db_shader_control = -1;
radv_reset_tracked_regs(cmd_buffer);
cmd_buffer->usage_flags = pBeginInfo->flags;
cmd_buffer->state.dirty |=
@ -8076,10 +8061,6 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline;
}
if (secondary->state.last_primitive_reset_index) {
primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index;
}
if (secondary->state.last_ia_multi_vgt_param) {
primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param;
}
@ -8090,9 +8071,6 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
primary->state.last_num_instances = secondary->state.last_num_instances;
primary->state.last_subpass_color_count = secondary->state.last_subpass_color_count;
primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
if (secondary->state.last_index_type != -1) {
primary->state.last_index_type = secondary->state.last_index_type;
@ -8101,13 +8079,17 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
primary->state.last_vrs_rates = secondary->state.last_vrs_rates;
primary->state.last_vrs_rates_sgpr_idx = secondary->state.last_vrs_rates_sgpr_idx;
primary->state.last_pa_sc_binner_cntl_0 = secondary->state.last_pa_sc_binner_cntl_0;
primary->state.last_db_shader_control = secondary->state.last_db_shader_control;
primary->state.rb_noncoherent_dirty |= secondary->state.rb_noncoherent_dirty;
primary->state.uses_draw_indirect |= secondary->state.uses_draw_indirect;
for (uint32_t reg = 0; reg < RADV_NUM_ALL_TRACKED_REGS; reg++) {
if (!BITSET_TEST(secondary->tracked_regs.reg_saved_mask, reg))
continue;
BITSET_SET(primary->tracked_regs.reg_saved_mask, reg);
primary->tracked_regs.reg_value[reg] = secondary->tracked_regs.reg_value[reg];
}
}
/* After executing commands from secondary buffers we have to dirty
@ -8123,7 +8105,6 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
primary->state.last_first_instance = -1;
primary->state.last_drawid = -1;
primary->state.last_vertex_offset_valid = false;
primary->state.last_db_count_control = -1;
}
static void
@ -9445,11 +9426,8 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
}
}
if (db_shader_control != cmd_buffer->state.last_db_shader_control) {
radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, db_shader_control);
cmd_buffer->state.last_db_shader_control = db_shader_control;
}
radeon_opt_set_context_reg(cmd_buffer, R_02880C_DB_SHADER_CONTROL, RADV_TRACKED_DB_SHADER_CONTROL,
db_shader_control);
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DB_SHADER_CONTROL;
}

View File

@ -290,6 +290,27 @@ enum rgp_flush_bits {
RGP_FLUSH_INVAL_L1 = 0x8000,
};
enum radv_tracked_reg {
RADV_TRACKED_DB_COUNT_CONTROL,
RADV_TRACKED_DB_SHADER_CONTROL,
RADV_TRACKED_PA_SC_BINNER_CNTL_0,
/* 3 consecutive registers */
RADV_TRACKED_SX_PS_DOWNCONVERT,
RADV_TRACKED_SX_BLEND_OPT_EPSILON,
RADV_TRACKED_SX_BLEND_OPT_CONTROL,
RADV_TRACKED_VGT_MULTI_PRIM_IB_RESET_INDX, /* GFX6-7 */
RADV_NUM_ALL_TRACKED_REGS,
};
struct radv_tracked_regs {
BITSET_DECLARE(reg_saved_mask, RADV_NUM_ALL_TRACKED_REGS);
uint32_t reg_value[RADV_NUM_ALL_TRACKED_REGS];
};
struct radv_cmd_state {
/* Vertex descriptors */
uint64_t vb_va;
@ -326,7 +347,6 @@ struct radv_cmd_state {
uint64_t index_va;
int32_t last_index_type;
uint32_t last_primitive_reset_index; /* only relevant on GFX6-7 */
enum radv_cmd_flush_bits flush_bits;
unsigned active_occlusion_queries;
bool perfect_occlusion_queries_enabled;
@ -348,14 +368,6 @@ struct radv_cmd_state {
uint32_t last_drawid;
uint32_t last_subpass_color_count;
uint32_t last_sx_ps_downconvert;
uint32_t last_sx_blend_opt_epsilon;
uint32_t last_sx_blend_opt_control;
uint32_t last_db_count_control;
uint32_t last_db_shader_control;
/* Whether CP DMA is busy/idle. */
bool dma_is_busy;
@ -424,9 +436,6 @@ struct radv_cmd_state {
unsigned spi_shader_col_format;
unsigned cb_shader_mask;
/* Binning state */
unsigned last_pa_sc_binner_cntl_0;
struct radv_multisample_state ms;
/* Custom blend mode for internal operations. */
@ -472,6 +481,8 @@ struct radv_cmd_buffer_upload {
struct radv_cmd_buffer {
struct vk_command_buffer vk;
struct radv_tracked_regs tracked_regs;
VkCommandBufferUsageFlags usage_flags;
struct radeon_cmdbuf *cs;
struct radv_cmd_state state;

View File

@ -193,6 +193,40 @@ radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigne
radeon_emit(cs, 0); /* unused */
}
#define radeon_opt_set_context_reg(cmdbuf, reg, reg_enum, value) \
do { \
struct radv_cmd_buffer *__cmdbuf = (cmdbuf); \
struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs; \
const uint32_t __value = (value); \
if (!BITSET_TEST(__tracked_regs->reg_saved_mask, (reg_enum)) || \
__tracked_regs->reg_value[(reg_enum)] != __value) { \
radeon_set_context_reg(__cmdbuf->cs, reg, __value); \
BITSET_SET(__tracked_regs->reg_saved_mask, (reg_enum)); \
__tracked_regs->reg_value[(reg_enum)] = __value; \
__cmdbuf->state.context_roll_without_scissor_emitted = true; \
} \
} while (0)
#define radeon_opt_set_context_reg3(cmdbuf, reg, reg_enum, v1, v2, v3) \
do { \
struct radv_cmd_buffer *__cmdbuf = (cmdbuf); \
struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs; \
const uint32_t __v1 = (v1), __v2 = (v2), __v3 = (v3); \
if (!BITSET_TEST_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 2, 0x7) || \
__tracked_regs->reg_value[(reg_enum)] != __v1 || __tracked_regs->reg_value[(reg_enum) + 1] != __v2 || \
__tracked_regs->reg_value[(reg_enum) + 2] != __v3) { \
radeon_set_context_reg_seq(cmdbuf->cs, reg, 3); \
radeon_emit(cmdbuf->cs, __v1); \
radeon_emit(cmdbuf->cs, __v2); \
radeon_emit(cmdbuf->cs, __v3); \
BITSET_SET_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 2); \
__tracked_regs->reg_value[(reg_enum)] = __v1; \
__tracked_regs->reg_value[(reg_enum) + 1] = __v2; \
__tracked_regs->reg_value[(reg_enum) + 2] = __v3; \
cmdbuf->state.context_roll_without_scissor_emitted = true; \
} \
} while (0)
ALWAYS_INLINE static void
radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
const uint32_t ref, const uint32_t mask)