From 0ab7ab40c8765a2d10df510ce17d43a87eec8f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 21 Oct 2020 12:34:51 -0400 Subject: [PATCH] radeonsi: tweak triangle list culling performance for GS fast launch Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 9ccde8fe80b..1cad17195dd 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1941,7 +1941,10 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) unsigned max_esverts_base = 128; if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) { - max_gsprims_base = 128 / 3; + /* Exactly 1 wave32 executes culling in primitive threads (there is no + * divergence), other waves are idle. + */ + max_gsprims_base = 32; max_esverts_base = max_gsprims_base * 3; } else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) { max_gsprims_base = 126;