radeonsi/gfx10: fix unnecessary LDS overallocation for NGG GS

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Marek Olšák 2019-07-05 22:12:36 -04:00
parent 985a59e0d1
commit 37db9d2865
2 changed files with 2 additions and 8 deletions

View File

@ -1279,17 +1279,11 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
/* We can't allow using the whole LDS, because GS waves compete with
* other shader stages for LDS space.
*
* Streamout can increase the ESGS buffer size later on, so be more
* conservative with streamout and use 4K dwords. This may be suboptimal.
*
* Otherwise, use the limit of 7K dwords. The reason is that we need
* to leave some headroom for the max_esverts increase at the end.
*
* TODO: We should really take the shader's internal LDS use into
* account. The linker will fail if the size is greater than
* 8K dwords.
*/
const unsigned max_lds_size = (gs_sel->so.num_outputs ? 4 : 7) * 1024 - 128;
const unsigned max_lds_size = 8 * 1024 - 768;
const unsigned target_lds_size = max_lds_size;
unsigned esvert_lds_size = 0;
unsigned gsprim_lds_size = 0;

View File

@ -5242,7 +5242,7 @@ static bool si_shader_binary_open(struct si_screen *screen,
}
if (sel && shader->key.as_ngg) {
if (sel->so.num_outputs) {
if (sel->type != PIPE_SHADER_GEOMETRY && sel->so.num_outputs) {
unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
esgs_ring_size = MAX2(esgs_ring_size,
shader->ngg.max_out_verts * esgs_vertex_bytes);