mirror of https://gitlab.freedesktop.org/mesa/mesa
ac: allow to use 64K of LDS for tessellation on GFX9+
This is the hardware limit and it's supposed to be working. GFX7-8 also support 64KiB but Stoney used to hang in the past and using 32KiB was the only known solution. fossils-db (NAVI21): Totals from 326 (0.41% of 79395) affected shaders: MaxWaves: 6352 -> 6378 (+0.41%); split: +0.50%, -0.09% Instrs: 232575 -> 232827 (+0.11%); split: -0.04%, +0.15% CodeSize: 1256940 -> 1258744 (+0.14%); split: -0.04%, +0.18% VGPRs: 17552 -> 17384 (-0.96%); split: -1.09%, +0.14% LDS: 2828800 -> 3899392 (+37.85%) Latency: 2937650 -> 2934667 (-0.10%); split: -0.30%, +0.20% InvThroughput: 704214 -> 700854 (-0.48%); split: -0.51%, +0.04% VClause: 4398 -> 4442 (+1.00%); split: -0.20%, +1.21% SClause: 5297 -> 5292 (-0.09%); split: -0.32%, +0.23% Copies: 14892 -> 14921 (+0.19%); split: -0.44%, +0.63% PreVGPRs: 13294 -> 13293 (-0.01%); split: -0.06%, +0.05% VALU: 156536 -> 156793 (+0.16%); split: -0.03%, +0.20% SALU: 21806 -> 21795 (-0.05%) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28015>
This commit is contained in:
parent
fb323ae46b
commit
2e79234f9d
|
@ -1079,13 +1079,10 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu
|
|||
|
||||
/* Make sure that the data fits in LDS. This assumes the shaders only
|
||||
* use LDS for the inputs and outputs.
|
||||
*
|
||||
* The maximum allowed LDS size is 32K. Higher numbers can hang.
|
||||
* Use 16K as the maximum, so that we can fit 2 workgroups on the same CU.
|
||||
*/
|
||||
if (lds_per_patch) {
|
||||
ASSERTED const unsigned max_lds_size = 32 * 1024; /* hw limit */
|
||||
const unsigned target_lds_size = 16 * 1024; /* target at least 2 workgroups per CU, 16K each */
|
||||
ASSERTED const unsigned max_lds_size = info->gfx_level >= GFX9 ? 64 * 1024 : 32 * 1024; /* hw limit */
|
||||
const unsigned target_lds_size = max_lds_size / 2; /* target at least 2 workgroups per CU */
|
||||
num_patches = MIN2(num_patches, target_lds_size / lds_per_patch);
|
||||
assert(num_patches * lds_per_patch <= max_lds_size);
|
||||
}
|
||||
|
@ -1116,7 +1113,7 @@ ac_compute_tess_lds_size(const struct radeon_info *info, uint32_t lds_per_patch,
|
|||
{
|
||||
const unsigned lds_size = lds_per_patch * num_patches;
|
||||
|
||||
assert(lds_size <= (info->gfx_level >= GFX7 ? 65536 : 32768));
|
||||
assert(lds_size <= (info->gfx_level >= GFX9 ? 65536 : 32768));
|
||||
|
||||
return align(lds_size, info->lds_encode_granularity) / info->lds_encode_granularity;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue