radeonsi: move PSIZE and CLIPDIST unique IO indices after GENERIC
Heaven LDS usage for LS+HS is below. The masks are "outputs_written" for LS and HS. Note that 32K is the maximum size. Before: heaven_x64: ls=1f1 tcs=1f1, lds=32K heaven_x64: ls=31 tcs=31, lds=24K heaven_x64: ls=71 tcs=71, lds=28K After: heaven_x64: ls=3f tcs=3f, lds=24K heaven_x64: ls=7 tcs=7, lds=13K heaven_x64: ls=f tcs=f, lds=17K All other apps have a similar decrease in LDS usage, because the "outputs_written" masks are similar. Also, most apps don't write POSITION in these shader stages, so there is room for improvement. (tight per-component input/output packing might help even more) It's unknown whether this improves performance. Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
2c4ec3f93f
commit
2b8b9a56ef
|
@ -136,18 +136,22 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
|
|||
switch (semantic_name) {
|
||||
case TGSI_SEMANTIC_POSITION:
|
||||
return 0;
|
||||
case TGSI_SEMANTIC_PSIZE:
|
||||
return 1;
|
||||
case TGSI_SEMANTIC_CLIPDIST:
|
||||
assert(index <= 1);
|
||||
return 2 + index;
|
||||
case TGSI_SEMANTIC_GENERIC:
|
||||
/* Since some shader stages use the the highest used IO index
|
||||
* to determine the size to allocate for inputs/outputs
|
||||
* (in LDS, tess and GS rings). GENERIC should be placed right
|
||||
* after POSITION to make that size as small as possible.
|
||||
*/
|
||||
if (index < SI_MAX_IO_GENERIC)
|
||||
return 4 + index;
|
||||
return 1 + index;
|
||||
|
||||
assert(!"invalid generic index");
|
||||
return 0;
|
||||
|
||||
case TGSI_SEMANTIC_PSIZE:
|
||||
return SI_MAX_IO_GENERIC + 1;
|
||||
case TGSI_SEMANTIC_CLIPDIST:
|
||||
assert(index <= 1);
|
||||
return SI_MAX_IO_GENERIC + 2 + index;
|
||||
case TGSI_SEMANTIC_FOG:
|
||||
return SI_MAX_IO_GENERIC + 4;
|
||||
case TGSI_SEMANTIC_LAYER:
|
||||
|
|
|
@ -1233,7 +1233,9 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
|
|||
uint64_t outputs_written = vs->outputs_written;
|
||||
uint64_t inputs_read = 0;
|
||||
|
||||
outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
|
||||
/* ignore POSITION, PSIZE */
|
||||
outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0) |
|
||||
(1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0))));
|
||||
|
||||
if (!ps_disabled) {
|
||||
inputs_read = ps->inputs_read;
|
||||
|
|
Loading…
Reference in New Issue