diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index a925b429eba..0aa6b9992db 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -321,7 +321,6 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 8d616317f0c..6845b0504c0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -100,9 +100,7 @@ static const struct debug_named_value radeonsi_debug_options[] = { {"switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet."}, {"nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization"}, {"nodpbb", DBG(NO_DPBB), "Disable DPBB."}, - {"nodfsm", DBG(NO_DFSM), "Disable DFSM."}, {"dpbb", DBG(DPBB), "Enable DPBB."}, - {"dfsm", DBG(DFSM), "Enable DFSM."}, {"nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z"}, {"no2d", DBG(NO_2D_TILING), "Disable 2D tiling"}, {"notiling", DBG(NO_TILING), "Disable tiling"}, @@ -1229,30 +1227,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->allow_dcc_msaa_clear_to_reg_for_bpp[bpp_log2] = true; } - /* Only enable primitive binning on APUs by default. */ - if (sscreen->info.chip_class >= GFX10) { - sscreen->dpbb_allowed = true; - /* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */ - } else if (sscreen->info.chip_class == GFX9) { - sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram; - /* DFSM reduces the Raven2 draw prim rate by ~43%. Disable it. */ - sscreen->dfsm_allowed = false; - } - - /* Process DPBB enable flags. */ - if (sscreen->debug_flags & DBG(DPBB)) { - sscreen->dpbb_allowed = true; - if (sscreen->debug_flags & DBG(DFSM)) - sscreen->dfsm_allowed = true; - } - - /* Process DPBB disable flags. */ - if (sscreen->debug_flags & DBG(NO_DPBB)) { - sscreen->dpbb_allowed = false; - sscreen->dfsm_allowed = false; - } else if (sscreen->debug_flags & DBG(NO_DFSM)) { - sscreen->dfsm_allowed = false; - } + sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) && + (sscreen->info.chip_class >= GFX10 || + /* Only enable primitive binning on gfx9 APUs by default. */ + (sscreen->info.chip_class == GFX9 && !sscreen->info.has_dedicated_vram) || + sscreen->debug_flags & DBG(DPBB)); if (sscreen->dpbb_allowed) { if (sscreen->info.has_dedicated_vram) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 33b0b168a36..8bf45fb2bf4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -229,9 +229,7 @@ enum DBG_SWITCH_ON_EOP, DBG_NO_OUT_OF_ORDER, DBG_NO_DPBB, - DBG_NO_DFSM, DBG_DPBB, - DBG_DFSM, DBG_NO_HYPERZ, DBG_NO_2D_TILING, DBG_NO_TILING, @@ -547,7 +545,6 @@ struct si_screen { bool assume_no_z_fights; bool commutative_blend_add; bool dpbb_allowed; - bool dfsm_allowed; bool use_ngg; bool use_ngg_culling; bool use_ngg_streamout; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b122609ca7c..8cb5347d2f7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3281,7 +3281,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); - if (sctx->screen->dfsm_allowed) { + if (sctx->screen->dpbb_allowed) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } @@ -3540,17 +3540,7 @@ static void si_emit_msaa_config(struct si_context *sctx) /* R_028A4C_PA_SC_MODE_CNTL_1 */ radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1); - - if (radeon_packets_added()) { - sctx->context_roll = true; - - /* GFX9: Flush DFSM when the AA mode changes. */ - if (sctx->screen->dfsm_allowed) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); - } - } - radeon_end(); + radeon_end_update_context_roll(sctx); } void si_update_ps_iter_samples(struct si_context *sctx) @@ -5207,6 +5197,12 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) S_028034_BR_X(16384) | S_028034_BR_Y(16384)); } + if (sctx->chip_class >= GFX10) { + si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, + S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | + S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); + } + unsigned cu_mask_ps = 0xffffffff; /* It's wasteful to enable all CUs for PS if shader arrays have a different @@ -5337,6 +5333,10 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); + + si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, + S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | + S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); } if (sctx->chip_class >= GFX9) { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ea31a2afd2a..fc7ba98800b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -290,7 +290,7 @@ enum si_tracked_reg SI_TRACKED_PA_CL_CLIP_CNTL, SI_TRACKED_PA_SC_BINNER_CNTL_0, - SI_TRACKED_DB_DFSM_CONTROL, + SI_TRACKED_DB_VRS_OVERRIDE_CNTL, SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */ diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 96354eff79f..20ae1fabc5f 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -435,12 +435,6 @@ static void si_emit_dpbb_disable(struct si_context *sctx) sctx->family >= CHIP_RAVEN2) && sctx->last_binning_enabled != 0)); } - - unsigned db_dfsm_control = - sctx->chip_class >= GFX10 ? R_028038_DB_DFSM_CONTROL : R_028060_DB_DFSM_CONTROL; - radeon_opt_set_context_reg( - sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL, - S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); radeon_end_update_context_roll(sctx); sctx->last_binning_enabled = false; @@ -498,23 +492,6 @@ void si_emit_dpbb_state(struct si_context *sctx) return; } - /* Enable DFSM if it's preferred. */ - unsigned punchout_mode = V_028060_FORCE_OFF; - bool disable_start_of_prim = true; - bool zs_eqaa_dfsm_bug = - sctx->chip_class == GFX9 && sctx->framebuffer.state.zsbuf && - sctx->framebuffer.nr_samples != MAX2(1, sctx->framebuffer.state.zsbuf->texture->nr_samples); - - if (sscreen->dfsm_allowed && !zs_eqaa_dfsm_bug && cb_target_enabled_4bit && - !G_02880C_KILL_ENABLE(db_shader_control) && - /* These two also imply that DFSM is disabled when PS writes to memory. */ - !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) && - !G_02880C_EXEC_ON_NOOP(db_shader_control) && - G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) { - punchout_mode = V_028060_AUTO; - disable_start_of_prim = (cb_target_enabled_4bit & blend->blend_enable_4bit) != 0; - } - /* Tunable parameters. */ unsigned fpovs_per_batch = 63; /* allowed range: [0, 255], 0 = unlimited */ @@ -533,18 +510,12 @@ void si_emit_dpbb_state(struct si_context *sctx) S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) | S_028C44_CONTEXT_STATES_PER_BIN(sscreen->pbb_context_states_per_bin - 1) | S_028C44_PERSISTENT_STATES_PER_BIN(sscreen->pbb_persistent_states_per_bin - 1) | - S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) | + S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1) | S_028C44_FLUSH_ON_BINNING_TRANSITION((sctx->family == CHIP_VEGA12 || sctx->family == CHIP_VEGA20 || sctx->family >= CHIP_RAVEN2) && sctx->last_binning_enabled != 1)); - - unsigned db_dfsm_control = - sctx->chip_class >= GFX10 ? R_028038_DB_DFSM_CONTROL : R_028060_DB_DFSM_CONTROL; - radeon_opt_set_context_reg( - sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL, - S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); radeon_end_update_context_roll(sctx); sctx->last_binning_enabled = true;