From f777d00a756d72cc01571ca94efa8afa3784fd4e Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 11 Nov 2020 09:20:52 +0100 Subject: [PATCH] radv: don't count unusable vertices to the NGG LDS size Ported from RadeonSI. To get optimal LDS usage since the previous change. Cc: 20.2 Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_pipeline.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ff2b46b5079..51ed322cdd0 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2078,10 +2078,17 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, max_gsprims = align(max_gsprims, wavesize); max_gsprims = MIN2(max_gsprims, max_gsprims_base); - if (gsprim_lds_size) - max_gsprims = MIN2(max_gsprims, - (max_lds_size - max_esverts * esvert_lds_size) / - gsprim_lds_size); + if (gsprim_lds_size) { + /* Don't count unusable vertices to the LDS + * size. Those are vertices above the maximum + * number of vertices that can occur in the + * workgroup, which is e.g. max_gsprims * 3 + * for triangles. + */ + unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); + max_gsprims = + MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); + } clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); @@ -2119,7 +2126,10 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, ngg->prim_amp_factor = prim_amp_factor; ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance; ngg->ngg_emit_size = max_gsprims * gsprim_lds_size; - ngg->esgs_ring_size = 4 * max_esverts * esvert_lds_size; + + /* Don't count unusable vertices. */ + ngg->esgs_ring_size = + MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4; if (gs_type == MESA_SHADER_GEOMETRY) { ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;