From c5a6ecf61a766ccb3e535df3580ffd07304e32ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 30 Jul 2019 21:29:29 -0400 Subject: [PATCH] radeonsi/gfx10: implement a bug workaround for GE_PC_ALLOC Reviewed-by: Samuel Pitoiset Acked-by: Pierre-Eric Pelloux-Prayer --- src/gallium/drivers/radeonsi/si_state.c | 12 ++++++++++++ src/gallium/drivers/radeonsi/si_state_shaders.c | 11 ----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 93fc272a476..64ce81f989c 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5619,6 +5619,18 @@ static void si_init_config(struct si_context *sctx) S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); + if (sctx->family == CHIP_NAVI10 || + sctx->family == CHIP_NAVI12 || + sctx->family == CHIP_NAVI14) { + /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */ + si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); + si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); + si_pm4_cmd_end(pm4, false); + } + /* TODO: For culling, replace 128 with 256. */ + si_pm4_set_reg(pm4, R_030980_GE_PC_ALLOC, + S_030980_OVERSUB_EN(1) | + S_030980_NUM_PC_LINES(128 * sscreen->info.max_se - 1)); } if (sctx->chip_class >= GFX8) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index a0bb0a458d6..e3bb2760e08 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1053,14 +1053,6 @@ static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx) gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw); } -static void si_set_ge_pc_alloc(struct si_screen *sscreen, - struct si_pm4_state *pm4, bool culling) -{ - si_pm4_set_reg(pm4, R_030980_GE_PC_ALLOC, - S_030980_OVERSUB_EN(1) | - S_030980_NUM_PC_LINES((culling ? 256 : 128) * sscreen->info.max_se - 1)); -} - unsigned si_get_input_prim(const struct si_shader_selector *gs) { if (gs->type == PIPE_SHADER_GEOMETRY) @@ -1167,7 +1159,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_00B22C_USER_SGPR_MSB_GFX10(num_user_sgprs >> 5) | S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) | S_00B22C_LDS_SIZE(shader->config.lds_size)); - si_set_ge_pc_alloc(sscreen, pm4, false); nparams = MAX2(shader->info.nr_param_exports, 1); shader->ctx_reg.ngg.spi_vs_out_config = @@ -1419,8 +1410,6 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(va >> 40)); - if (sscreen->info.chip_class >= GFX10) - si_set_ge_pc_alloc(sscreen, pm4, false); uint32_t rsrc1 = S_00B128_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |