radeonsi: enable late VS allocation (v3)
v2: take the number of CUs into account v3: change in LS allocation Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
97648229e4
commit
5944f3d2fc
|
@ -3729,12 +3729,32 @@ static void si_init_config(struct si_context *sctx)
|
||||||
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
|
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
|
||||||
|
|
||||||
if (sctx->b.chip_class >= CIK) {
|
if (sctx->b.chip_class >= CIK) {
|
||||||
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
|
|
||||||
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
|
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
|
||||||
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
|
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
|
||||||
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
|
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
|
||||||
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
|
|
||||||
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
|
if (sscreen->b.info.num_good_compute_units /
|
||||||
|
(sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
|
||||||
|
/* Too few available compute units per SH. Disallowing
|
||||||
|
* VS to run on CU0 could hurt us more than late VS
|
||||||
|
* allocation would help.
|
||||||
|
*
|
||||||
|
* LATE_ALLOC_VS = 2 is the highest safe number.
|
||||||
|
*/
|
||||||
|
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
|
||||||
|
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
|
||||||
|
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
|
||||||
|
} else {
|
||||||
|
/* Set LATE_ALLOC_VS == 31. It should be less than
|
||||||
|
* the number of scratch waves. Limitations:
|
||||||
|
* - VS can't execute on CU0.
|
||||||
|
* - If HS writes outputs to LDS, LS can't execute on CU0.
|
||||||
|
*/
|
||||||
|
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
|
||||||
|
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
|
||||||
|
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
|
||||||
|
}
|
||||||
|
|
||||||
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
|
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue