mirror of https://gitlab.freedesktop.org/mesa/mesa
radv: add a new mechanism for tracking registers per cmdbuf
We already track a couple of registers per cmdbuf and this introduces a generic mechanism, instead of having a bunch of last_xxx fields. Loosely based on RadeonSI. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28644>
This commit is contained in:
parent
39a9f68685
commit
1173058002
|
@ -399,6 +399,16 @@ radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer)
|
|||
memset(&cmd_buffer->state.render, 0, sizeof(cmd_buffer->state.render));
|
||||
}
|
||||
|
||||
static void
|
||||
radv_reset_tracked_regs(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_tracked_regs *tracked_regs = &cmd_buffer->tracked_regs;
|
||||
|
||||
/* Mark all registers as unknown. */
|
||||
memset(tracked_regs->reg_value, 0, RADV_NUM_ALL_TRACKED_REGS * sizeof(uint32_t));
|
||||
BITSET_ZERO(tracked_regs->reg_saved_mask);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags)
|
||||
{
|
||||
|
@ -1638,12 +1648,8 @@ radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
pa_sc_binner_cntl_0 = radv_get_binning_state(cmd_buffer);
|
||||
|
||||
if (pa_sc_binner_cntl_0 == cmd_buffer->state.last_pa_sc_binner_cntl_0)
|
||||
return;
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, pa_sc_binner_cntl_0);
|
||||
|
||||
cmd_buffer->state.last_pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
|
||||
radeon_opt_set_context_reg(cmd_buffer, R_028C44_PA_SC_BINNER_CNTL_0, RADV_TRACKED_PA_SC_BINNER_CNTL_0,
|
||||
pa_sc_binner_cntl_0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1837,18 +1843,8 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
* breaks dual source blending in SkQP and does not seem to improve
|
||||
* performance. */
|
||||
|
||||
if (sx_ps_downconvert != cmd_buffer->state.last_sx_ps_downconvert ||
|
||||
sx_blend_opt_epsilon != cmd_buffer->state.last_sx_blend_opt_epsilon ||
|
||||
sx_blend_opt_control != cmd_buffer->state.last_sx_blend_opt_control) {
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
|
||||
radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
|
||||
radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
|
||||
radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
|
||||
|
||||
cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
|
||||
cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
|
||||
cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
|
||||
}
|
||||
radeon_opt_set_context_reg3(cmd_buffer, R_028754_SX_PS_DOWNCONVERT, RADV_TRACKED_SX_PS_DOWNCONVERT,
|
||||
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
|
||||
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_RBPLUS;
|
||||
}
|
||||
|
@ -2525,10 +2521,8 @@ radv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (en && gfx_level <= GFX7) {
|
||||
const uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
|
||||
|
||||
if (primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) {
|
||||
radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
|
||||
cmd_buffer->state.last_primitive_reset_index = primitive_reset_index;
|
||||
}
|
||||
radeon_opt_set_context_reg(cmd_buffer, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
|
||||
RADV_TRACKED_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3879,13 +3873,7 @@ radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
}
|
||||
|
||||
if (db_count_control != cmd_buffer->state.last_db_count_control) {
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
|
||||
|
||||
cmd_buffer->state.context_roll_without_scissor_emitted = true;
|
||||
|
||||
cmd_buffer->state.last_db_count_control = db_count_control;
|
||||
}
|
||||
radeon_opt_set_context_reg(cmd_buffer, R_028004_DB_COUNT_CONTROL, RADV_TRACKED_DB_COUNT_CONTROL, db_count_control);
|
||||
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_OCCLUSION_QUERY;
|
||||
}
|
||||
|
@ -6073,15 +6061,12 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
|
|||
cmd_buffer->state.last_drawid = -1;
|
||||
cmd_buffer->state.last_subpass_color_count = MAX_RTS;
|
||||
cmd_buffer->state.predication_type = -1;
|
||||
cmd_buffer->state.last_sx_ps_downconvert = -1;
|
||||
cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
|
||||
cmd_buffer->state.last_sx_blend_opt_control = -1;
|
||||
cmd_buffer->state.mesh_shading = false;
|
||||
cmd_buffer->state.last_vrs_rates = -1;
|
||||
cmd_buffer->state.last_vrs_rates_sgpr_idx = -1;
|
||||
cmd_buffer->state.last_pa_sc_binner_cntl_0 = -1;
|
||||
cmd_buffer->state.last_db_count_control = -1;
|
||||
cmd_buffer->state.last_db_shader_control = -1;
|
||||
|
||||
radv_reset_tracked_regs(cmd_buffer);
|
||||
|
||||
cmd_buffer->usage_flags = pBeginInfo->flags;
|
||||
|
||||
cmd_buffer->state.dirty |=
|
||||
|
@ -8076,10 +8061,6 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
|||
primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline;
|
||||
}
|
||||
|
||||
if (secondary->state.last_primitive_reset_index) {
|
||||
primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index;
|
||||
}
|
||||
|
||||
if (secondary->state.last_ia_multi_vgt_param) {
|
||||
primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param;
|
||||
}
|
||||
|
@ -8090,9 +8071,6 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
|||
|
||||
primary->state.last_num_instances = secondary->state.last_num_instances;
|
||||
primary->state.last_subpass_color_count = secondary->state.last_subpass_color_count;
|
||||
primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
|
||||
primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
|
||||
primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
|
||||
|
||||
if (secondary->state.last_index_type != -1) {
|
||||
primary->state.last_index_type = secondary->state.last_index_type;
|
||||
|
@ -8101,13 +8079,17 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
|||
primary->state.last_vrs_rates = secondary->state.last_vrs_rates;
|
||||
primary->state.last_vrs_rates_sgpr_idx = secondary->state.last_vrs_rates_sgpr_idx;
|
||||
|
||||
primary->state.last_pa_sc_binner_cntl_0 = secondary->state.last_pa_sc_binner_cntl_0;
|
||||
|
||||
primary->state.last_db_shader_control = secondary->state.last_db_shader_control;
|
||||
|
||||
primary->state.rb_noncoherent_dirty |= secondary->state.rb_noncoherent_dirty;
|
||||
|
||||
primary->state.uses_draw_indirect |= secondary->state.uses_draw_indirect;
|
||||
|
||||
for (uint32_t reg = 0; reg < RADV_NUM_ALL_TRACKED_REGS; reg++) {
|
||||
if (!BITSET_TEST(secondary->tracked_regs.reg_saved_mask, reg))
|
||||
continue;
|
||||
|
||||
BITSET_SET(primary->tracked_regs.reg_saved_mask, reg);
|
||||
primary->tracked_regs.reg_value[reg] = secondary->tracked_regs.reg_value[reg];
|
||||
}
|
||||
}
|
||||
|
||||
/* After executing commands from secondary buffers we have to dirty
|
||||
|
@ -8123,7 +8105,6 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
|||
primary->state.last_first_instance = -1;
|
||||
primary->state.last_drawid = -1;
|
||||
primary->state.last_vertex_offset_valid = false;
|
||||
primary->state.last_db_count_control = -1;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -9445,11 +9426,8 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
}
|
||||
|
||||
if (db_shader_control != cmd_buffer->state.last_db_shader_control) {
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, db_shader_control);
|
||||
|
||||
cmd_buffer->state.last_db_shader_control = db_shader_control;
|
||||
}
|
||||
radeon_opt_set_context_reg(cmd_buffer, R_02880C_DB_SHADER_CONTROL, RADV_TRACKED_DB_SHADER_CONTROL,
|
||||
db_shader_control);
|
||||
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DB_SHADER_CONTROL;
|
||||
}
|
||||
|
|
|
@ -290,6 +290,27 @@ enum rgp_flush_bits {
|
|||
RGP_FLUSH_INVAL_L1 = 0x8000,
|
||||
};
|
||||
|
||||
enum radv_tracked_reg {
|
||||
RADV_TRACKED_DB_COUNT_CONTROL,
|
||||
RADV_TRACKED_DB_SHADER_CONTROL,
|
||||
|
||||
RADV_TRACKED_PA_SC_BINNER_CNTL_0,
|
||||
|
||||
/* 3 consecutive registers */
|
||||
RADV_TRACKED_SX_PS_DOWNCONVERT,
|
||||
RADV_TRACKED_SX_BLEND_OPT_EPSILON,
|
||||
RADV_TRACKED_SX_BLEND_OPT_CONTROL,
|
||||
|
||||
RADV_TRACKED_VGT_MULTI_PRIM_IB_RESET_INDX, /* GFX6-7 */
|
||||
|
||||
RADV_NUM_ALL_TRACKED_REGS,
|
||||
};
|
||||
|
||||
struct radv_tracked_regs {
|
||||
BITSET_DECLARE(reg_saved_mask, RADV_NUM_ALL_TRACKED_REGS);
|
||||
uint32_t reg_value[RADV_NUM_ALL_TRACKED_REGS];
|
||||
};
|
||||
|
||||
struct radv_cmd_state {
|
||||
/* Vertex descriptors */
|
||||
uint64_t vb_va;
|
||||
|
@ -326,7 +347,6 @@ struct radv_cmd_state {
|
|||
uint64_t index_va;
|
||||
int32_t last_index_type;
|
||||
|
||||
uint32_t last_primitive_reset_index; /* only relevant on GFX6-7 */
|
||||
enum radv_cmd_flush_bits flush_bits;
|
||||
unsigned active_occlusion_queries;
|
||||
bool perfect_occlusion_queries_enabled;
|
||||
|
@ -348,14 +368,6 @@ struct radv_cmd_state {
|
|||
uint32_t last_drawid;
|
||||
uint32_t last_subpass_color_count;
|
||||
|
||||
uint32_t last_sx_ps_downconvert;
|
||||
uint32_t last_sx_blend_opt_epsilon;
|
||||
uint32_t last_sx_blend_opt_control;
|
||||
|
||||
uint32_t last_db_count_control;
|
||||
|
||||
uint32_t last_db_shader_control;
|
||||
|
||||
/* Whether CP DMA is busy/idle. */
|
||||
bool dma_is_busy;
|
||||
|
||||
|
@ -424,9 +436,6 @@ struct radv_cmd_state {
|
|||
unsigned spi_shader_col_format;
|
||||
unsigned cb_shader_mask;
|
||||
|
||||
/* Binning state */
|
||||
unsigned last_pa_sc_binner_cntl_0;
|
||||
|
||||
struct radv_multisample_state ms;
|
||||
|
||||
/* Custom blend mode for internal operations. */
|
||||
|
@ -472,6 +481,8 @@ struct radv_cmd_buffer_upload {
|
|||
struct radv_cmd_buffer {
|
||||
struct vk_command_buffer vk;
|
||||
|
||||
struct radv_tracked_regs tracked_regs;
|
||||
|
||||
VkCommandBufferUsageFlags usage_flags;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_state state;
|
||||
|
|
|
@ -193,6 +193,40 @@ radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigne
|
|||
radeon_emit(cs, 0); /* unused */
|
||||
}
|
||||
|
||||
#define radeon_opt_set_context_reg(cmdbuf, reg, reg_enum, value) \
|
||||
do { \
|
||||
struct radv_cmd_buffer *__cmdbuf = (cmdbuf); \
|
||||
struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs; \
|
||||
const uint32_t __value = (value); \
|
||||
if (!BITSET_TEST(__tracked_regs->reg_saved_mask, (reg_enum)) || \
|
||||
__tracked_regs->reg_value[(reg_enum)] != __value) { \
|
||||
radeon_set_context_reg(__cmdbuf->cs, reg, __value); \
|
||||
BITSET_SET(__tracked_regs->reg_saved_mask, (reg_enum)); \
|
||||
__tracked_regs->reg_value[(reg_enum)] = __value; \
|
||||
__cmdbuf->state.context_roll_without_scissor_emitted = true; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define radeon_opt_set_context_reg3(cmdbuf, reg, reg_enum, v1, v2, v3) \
|
||||
do { \
|
||||
struct radv_cmd_buffer *__cmdbuf = (cmdbuf); \
|
||||
struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs; \
|
||||
const uint32_t __v1 = (v1), __v2 = (v2), __v3 = (v3); \
|
||||
if (!BITSET_TEST_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 2, 0x7) || \
|
||||
__tracked_regs->reg_value[(reg_enum)] != __v1 || __tracked_regs->reg_value[(reg_enum) + 1] != __v2 || \
|
||||
__tracked_regs->reg_value[(reg_enum) + 2] != __v3) { \
|
||||
radeon_set_context_reg_seq(cmdbuf->cs, reg, 3); \
|
||||
radeon_emit(cmdbuf->cs, __v1); \
|
||||
radeon_emit(cmdbuf->cs, __v2); \
|
||||
radeon_emit(cmdbuf->cs, __v3); \
|
||||
BITSET_SET_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 2); \
|
||||
__tracked_regs->reg_value[(reg_enum)] = __v1; \
|
||||
__tracked_regs->reg_value[(reg_enum) + 1] = __v2; \
|
||||
__tracked_regs->reg_value[(reg_enum) + 2] = __v3; \
|
||||
cmdbuf->state.context_roll_without_scissor_emitted = true; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
|
||||
const uint32_t ref, const uint32_t mask)
|
||||
|
|
Loading…
Reference in New Issue