radeonsi: remove DFSM after we discovered how bad it is

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10813>
This commit is contained in:
Marek Olšák 2021-05-20 05:27:43 -04:00 committed by Marge Bot
parent 56a450e984
commit 7844bdadac
6 changed files with 19 additions and 73 deletions

View File

@ -321,7 +321,6 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;

View File

@ -100,9 +100,7 @@ static const struct debug_named_value radeonsi_debug_options[] = {
{"switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet."},
{"nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization"},
{"nodpbb", DBG(NO_DPBB), "Disable DPBB."},
{"nodfsm", DBG(NO_DFSM), "Disable DFSM."},
{"dpbb", DBG(DPBB), "Enable DPBB."},
{"dfsm", DBG(DFSM), "Enable DFSM."},
{"nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z"},
{"no2d", DBG(NO_2D_TILING), "Disable 2D tiling"},
{"notiling", DBG(NO_TILING), "Disable tiling"},
@ -1229,30 +1227,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->allow_dcc_msaa_clear_to_reg_for_bpp[bpp_log2] = true;
}
/* Only enable primitive binning on APUs by default. */
if (sscreen->info.chip_class >= GFX10) {
sscreen->dpbb_allowed = true;
/* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */
} else if (sscreen->info.chip_class == GFX9) {
sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram;
/* DFSM reduces the Raven2 draw prim rate by ~43%. Disable it. */
sscreen->dfsm_allowed = false;
}
/* Process DPBB enable flags. */
if (sscreen->debug_flags & DBG(DPBB)) {
sscreen->dpbb_allowed = true;
if (sscreen->debug_flags & DBG(DFSM))
sscreen->dfsm_allowed = true;
}
/* Process DPBB disable flags. */
if (sscreen->debug_flags & DBG(NO_DPBB)) {
sscreen->dpbb_allowed = false;
sscreen->dfsm_allowed = false;
} else if (sscreen->debug_flags & DBG(NO_DFSM)) {
sscreen->dfsm_allowed = false;
}
sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) &&
(sscreen->info.chip_class >= GFX10 ||
/* Only enable primitive binning on gfx9 APUs by default. */
(sscreen->info.chip_class == GFX9 && !sscreen->info.has_dedicated_vram) ||
sscreen->debug_flags & DBG(DPBB));
if (sscreen->dpbb_allowed) {
if (sscreen->info.has_dedicated_vram) {

View File

@ -229,9 +229,7 @@ enum
DBG_SWITCH_ON_EOP,
DBG_NO_OUT_OF_ORDER,
DBG_NO_DPBB,
DBG_NO_DFSM,
DBG_DPBB,
DBG_DFSM,
DBG_NO_HYPERZ,
DBG_NO_2D_TILING,
DBG_NO_TILING,
@ -547,7 +545,6 @@ struct si_screen {
bool assume_no_z_fights;
bool commutative_blend_add;
bool dpbb_allowed;
bool dfsm_allowed;
bool use_ngg;
bool use_ngg_culling;
bool use_ngg_streamout;

View File

@ -3281,7 +3281,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
if (sctx->screen->dfsm_allowed) {
if (sctx->screen->dpbb_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
@ -3540,17 +3540,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
/* R_028A4C_PA_SC_MODE_CNTL_1 */
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1);
if (radeon_packets_added()) {
sctx->context_roll = true;
/* GFX9: Flush DFSM when the AA mode changes. */
if (sctx->screen->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
}
radeon_end();
radeon_end_update_context_roll(sctx);
}
void si_update_ps_iter_samples(struct si_context *sctx)
@ -5207,6 +5197,12 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
S_028034_BR_X(16384) | S_028034_BR_Y(16384));
}
if (sctx->chip_class >= GFX10) {
si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL,
S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) |
S_028038_POPS_DRAIN_PS_ON_OVERLAP(1));
}
unsigned cu_mask_ps = 0xffffffff;
/* It's wasteful to enable all CUs for PS if shader arrays have a different
@ -5337,6 +5333,10 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
}
if (sctx->chip_class >= GFX9) {

View File

@ -290,7 +290,7 @@ enum si_tracked_reg
SI_TRACKED_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
SI_TRACKED_DB_DFSM_CONTROL,
SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */

View File

@ -435,12 +435,6 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
sctx->family >= CHIP_RAVEN2) &&
sctx->last_binning_enabled != 0));
}
unsigned db_dfsm_control =
sctx->chip_class >= GFX10 ? R_028038_DB_DFSM_CONTROL : R_028060_DB_DFSM_CONTROL;
radeon_opt_set_context_reg(
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
radeon_end_update_context_roll(sctx);
sctx->last_binning_enabled = false;
@ -498,23 +492,6 @@ void si_emit_dpbb_state(struct si_context *sctx)
return;
}
/* Enable DFSM if it's preferred. */
unsigned punchout_mode = V_028060_FORCE_OFF;
bool disable_start_of_prim = true;
bool zs_eqaa_dfsm_bug =
sctx->chip_class == GFX9 && sctx->framebuffer.state.zsbuf &&
sctx->framebuffer.nr_samples != MAX2(1, sctx->framebuffer.state.zsbuf->texture->nr_samples);
if (sscreen->dfsm_allowed && !zs_eqaa_dfsm_bug && cb_target_enabled_4bit &&
!G_02880C_KILL_ENABLE(db_shader_control) &&
/* These two also imply that DFSM is disabled when PS writes to memory. */
!G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) &&
!G_02880C_EXEC_ON_NOOP(db_shader_control) &&
G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) {
punchout_mode = V_028060_AUTO;
disable_start_of_prim = (cb_target_enabled_4bit & blend->blend_enable_4bit) != 0;
}
/* Tunable parameters. */
unsigned fpovs_per_batch = 63; /* allowed range: [0, 255], 0 = unlimited */
@ -533,18 +510,12 @@ void si_emit_dpbb_state(struct si_context *sctx)
S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
S_028C44_CONTEXT_STATES_PER_BIN(sscreen->pbb_context_states_per_bin - 1) |
S_028C44_PERSISTENT_STATES_PER_BIN(sscreen->pbb_persistent_states_per_bin - 1) |
S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
S_028C44_DISABLE_START_OF_PRIM(1) |
S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1) |
S_028C44_FLUSH_ON_BINNING_TRANSITION((sctx->family == CHIP_VEGA12 ||
sctx->family == CHIP_VEGA20 ||
sctx->family >= CHIP_RAVEN2) &&
sctx->last_binning_enabled != 1));
unsigned db_dfsm_control =
sctx->chip_class >= GFX10 ? R_028038_DB_DFSM_CONTROL : R_028060_DB_DFSM_CONTROL;
radeon_opt_set_context_reg(
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
radeon_end_update_context_roll(sctx);
sctx->last_binning_enabled = true;