radeonsi: always use Wave32 for GS fast launch, because Wave64 hangs
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5524>
This commit is contained in:
parent
8fff9beb44
commit
9049e39804
|
@ -1880,12 +1880,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx)
|
|||
|
||||
static inline unsigned si_get_wave_size(struct si_screen *sscreen,
|
||||
enum pipe_shader_type shader_type, bool ngg, bool es,
|
||||
bool prim_discard_cs)
|
||||
bool gs_fast_launch, bool prim_discard_cs)
|
||||
{
|
||||
if (shader_type == PIPE_SHADER_COMPUTE)
|
||||
return sscreen->compute_wave_size;
|
||||
else if (shader_type == PIPE_SHADER_FRAGMENT)
|
||||
return sscreen->ps_wave_size;
|
||||
else if (gs_fast_launch)
|
||||
return 32; /* GS fast launch hangs with Wave64, so always use Wave32. */
|
||||
else if ((shader_type == PIPE_SHADER_VERTEX && prim_discard_cs) || /* only Wave64 implemented */
|
||||
(shader_type == PIPE_SHADER_VERTEX && es && !ngg) ||
|
||||
(shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) ||
|
||||
|
@ -1898,7 +1900,9 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
|
|||
static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
|
||||
{
|
||||
return si_get_wave_size(shader->selector->screen, shader->selector->type, shader->key.as_ngg,
|
||||
shader->key.as_es, shader->key.opt.vs_as_prim_discard_cs);
|
||||
shader->key.as_es,
|
||||
shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL,
|
||||
shader->key.opt.vs_as_prim_discard_cs);
|
||||
}
|
||||
|
||||
#define PRINT_ERR(fmt, args...) \
|
||||
|
|
|
@ -1967,6 +1967,9 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
|
|||
shader.key.as_ls = key->vs_prolog.as_ls;
|
||||
shader.key.as_es = key->vs_prolog.as_es;
|
||||
shader.key.as_ngg = key->vs_prolog.as_ngg;
|
||||
shader.key.opt.ngg_culling =
|
||||
(key->vs_prolog.gs_fast_launch_tri_list ? SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST : 0) |
|
||||
(key->vs_prolog.gs_fast_launch_tri_strip ? SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP : 0);
|
||||
shader.key.opt.vs_as_prim_discard_cs = key->vs_prolog.as_prim_discard_cs;
|
||||
break;
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
|
@ -1990,6 +1993,7 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
|
|||
struct si_shader_context ctx;
|
||||
si_llvm_context_init(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, type, shader.key.as_ngg, shader.key.as_es,
|
||||
shader.key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL,
|
||||
shader.key.opt.vs_as_prim_discard_cs));
|
||||
ctx.shader = &shader;
|
||||
ctx.type = type;
|
||||
|
|
|
@ -474,7 +474,8 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
shader->is_gs_copy_shader = true;
|
||||
|
||||
si_llvm_context_init(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false, false));
|
||||
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX,
|
||||
false, false, false, false));
|
||||
ctx.shader = shader;
|
||||
ctx.type = PIPE_SHADER_VERTEX;
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
|
|||
shader_variant_flags |= 1 << 0;
|
||||
if (sel->nir)
|
||||
shader_variant_flags |= 1 << 1;
|
||||
if (si_get_wave_size(sel->screen, sel->type, ngg, es, false) == 32)
|
||||
if (si_get_wave_size(sel->screen, sel->type, ngg, es, false, false) == 32)
|
||||
shader_variant_flags |= 1 << 2;
|
||||
if (sel->type == PIPE_SHADER_FRAGMENT && sel->info.uses_derivatives && sel->info.uses_kill &&
|
||||
sel->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL))
|
||||
|
@ -1120,11 +1120,13 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
else
|
||||
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
|
||||
|
||||
unsigned wave_size = si_get_shader_wave_size(shader);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
|
||||
si_pm4_set_reg(
|
||||
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |
|
||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) |
|
||||
S_00B228_MEM_ORDERED(1) | S_00B228_WGP_MODE(1) |
|
||||
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
|
||||
|
@ -3692,7 +3694,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
|
|||
if (screen->info.chip_class >= GFX9)
|
||||
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||
|
||||
if (screen->info.chip_class >= GFX10 && screen->ge_wave_size == 32) {
|
||||
if (screen->info.chip_class >= GFX10 &&
|
||||
/* GS fast launch hangs with Wave64, so always use Wave32. */
|
||||
(screen->ge_wave_size == 32 || (key.u.ngg && key.u.ngg_gs_fast_launch))) {
|
||||
stages |= S_028B54_HS_W32_EN(1) |
|
||||
S_028B54_GS_W32_EN(key.u.ngg) | /* legacy GS only supports Wave64 */
|
||||
S_028B54_VS_W32_EN(1);
|
||||
|
|
Loading…
Reference in New Issue