radeonsi: always use Wave32 for GS fast launch, because Wave64 hangs

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5524>
This commit is contained in:
Marek Olšák 2020-06-17 11:45:16 -04:00 committed by Marge Bot
parent 8fff9beb44
commit 9049e39804
4 changed files with 19 additions and 6 deletions

View File

@ -1880,12 +1880,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx)
static inline unsigned si_get_wave_size(struct si_screen *sscreen,
enum pipe_shader_type shader_type, bool ngg, bool es,
bool prim_discard_cs)
bool gs_fast_launch, bool prim_discard_cs)
{
if (shader_type == PIPE_SHADER_COMPUTE)
return sscreen->compute_wave_size;
else if (shader_type == PIPE_SHADER_FRAGMENT)
return sscreen->ps_wave_size;
else if (gs_fast_launch)
return 32; /* GS fast launch hangs with Wave64, so always use Wave32. */
else if ((shader_type == PIPE_SHADER_VERTEX && prim_discard_cs) || /* only Wave64 implemented */
(shader_type == PIPE_SHADER_VERTEX && es && !ngg) ||
(shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) ||
@ -1898,7 +1900,9 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
{
return si_get_wave_size(shader->selector->screen, shader->selector->type, shader->key.as_ngg,
shader->key.as_es, shader->key.opt.vs_as_prim_discard_cs);
shader->key.as_es,
shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL,
shader->key.opt.vs_as_prim_discard_cs);
}
#define PRINT_ERR(fmt, args...) \

View File

@ -1967,6 +1967,9 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
shader.key.as_ls = key->vs_prolog.as_ls;
shader.key.as_es = key->vs_prolog.as_es;
shader.key.as_ngg = key->vs_prolog.as_ngg;
shader.key.opt.ngg_culling =
(key->vs_prolog.gs_fast_launch_tri_list ? SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST : 0) |
(key->vs_prolog.gs_fast_launch_tri_strip ? SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP : 0);
shader.key.opt.vs_as_prim_discard_cs = key->vs_prolog.as_prim_discard_cs;
break;
case PIPE_SHADER_TESS_CTRL:
@ -1990,6 +1993,7 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
struct si_shader_context ctx;
si_llvm_context_init(&ctx, sscreen, compiler,
si_get_wave_size(sscreen, type, shader.key.as_ngg, shader.key.as_es,
shader.key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL,
shader.key.opt.vs_as_prim_discard_cs));
ctx.shader = &shader;
ctx.type = type;

View File

@ -474,7 +474,8 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->is_gs_copy_shader = true;
si_llvm_context_init(&ctx, sscreen, compiler,
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false, false));
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX,
false, false, false, false));
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;

View File

@ -69,7 +69,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
shader_variant_flags |= 1 << 0;
if (sel->nir)
shader_variant_flags |= 1 << 1;
if (si_get_wave_size(sel->screen, sel->type, ngg, es, false) == 32)
if (si_get_wave_size(sel->screen, sel->type, ngg, es, false, false) == 32)
shader_variant_flags |= 1 << 2;
if (sel->type == PIPE_SHADER_FRAGMENT && sel->info.uses_derivatives && sel->info.uses_kill &&
sel->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL))
@ -1120,11 +1120,13 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
else
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
unsigned wave_size = si_get_shader_wave_size(shader);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) |
S_00B228_MEM_ORDERED(1) | S_00B228_WGP_MODE(1) |
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
@ -3692,7 +3694,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
if (screen->info.chip_class >= GFX9)
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
if (screen->info.chip_class >= GFX10 && screen->ge_wave_size == 32) {
if (screen->info.chip_class >= GFX10 &&
/* GS fast launch hangs with Wave64, so always use Wave32. */
(screen->ge_wave_size == 32 || (key.u.ngg && key.u.ngg_gs_fast_launch))) {
stages |= S_028B54_HS_W32_EN(1) |
S_028B54_GS_W32_EN(key.u.ngg) | /* legacy GS only supports Wave64 */
S_028B54_VS_W32_EN(1);