radv: remove color exports in presence of holes

If there is holes, eg. if only MRT0 and MRT2 are exported, we have to
set MRT1 to SPI_SHADER_32_R to avoid a GPU hang but the export can
still be removed from the fragment shader.

fossils-db (Sienna Cichlid):
Totals from 565 (0.42% of 134913) affected shaders:
VGPRs: 13328 -> 11456 (-14.05%)
CodeSize: 613232 -> 548224 (-10.60%); split: -11.13%, +0.53%
LDS: 284672 -> 296960 (+4.32%)
MaxWaves: 17624 -> 17684 (+0.34%)
Instrs: 113056 -> 100445 (-11.15%); split: -11.68%, +0.53%
Latency: 684327 -> 639348 (-6.57%); split: -7.17%, +0.60%
InvThroughput: 122877 -> 104382 (-15.05%); split: -15.18%, +0.13%
VClause: 2601 -> 2323 (-10.69%); split: -10.77%, +0.08%
SClause: 5629 -> 5443 (-3.30%); split: -3.91%, +0.60%
Copies: 9393 -> 8720 (-7.16%); split: -8.22%, +1.05%
PreSGPRs: 14623 -> 13666 (-6.54%); split: -6.76%, +0.22%
PreVGPRs: 9847 -> 8503 (-13.65%)

fossils-db (Polaris10):
Totals from 565 (0.42% of 135960) affected shaders:
SGPRs: 28064 -> 27104 (-3.42%)
VGPRs: 12516 -> 10544 (-15.76%); split: -15.79%, +0.03%
CodeSize: 516920 -> 456536 (-11.68%); split: -11.68%, +0.00%
MaxWaves: 4369 -> 4418 (+1.12%)
Instrs: 97771 -> 85903 (-12.14%); split: -12.14%, +0.00%
Latency: 767482 -> 708545 (-7.68%); split: -7.97%, +0.29%
InvThroughput: 280017 -> 235744 (-15.81%)
VClause: 2270 -> 2090 (-7.93%); split: -8.50%, +0.57%
SClause: 5185 -> 5012 (-3.34%); split: -3.86%, +0.52%
Copies: 8328 -> 7555 (-9.28%); split: -9.35%, +0.07%
Branches: 1143 -> 1113 (-2.62%)
PreSGPRs: 13816 -> 12725 (-7.90%); split: -7.92%, +0.02%
PreVGPRs: 9707 -> 8270 (-14.80%)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15108>
This commit is contained in:
Samuel Pitoiset 2022-02-21 15:47:46 +01:00
parent f800af2231
commit 1e010348ee
2 changed files with 10 additions and 5 deletions

View File

@ -2613,15 +2613,16 @@ radv_link_shaders(struct radv_pipeline *pipeline,
continue;
unsigned col_format = (pipeline_key->ps.col_format >> (4 * idx)) & 0xf;
switch (col_format) {
case V_028714_SPI_SHADER_ZERO:
unsigned cb_target_mask = (pipeline_key->ps.cb_target_mask >> (4 * idx)) & 0xf;
if (col_format == V_028714_SPI_SHADER_ZERO ||
(col_format == V_028714_SPI_SHADER_32_R && !cb_target_mask &&
!pipeline_key->ps.mrt0_is_dual_src)) {
/* Remove the color export if it's unused or in presence of holes. */
info->outputs_written &= ~BITFIELD64_BIT(var->data.location);
var->data.location = 0;
var->data.mode = nir_var_shader_temp;
fixup_derefs = true;
break;
default:
break;
}
}
if (fixup_derefs) {
@ -2973,6 +2974,8 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
}
key.ps.col_format = blend->spi_shader_col_format;
key.ps.cb_target_mask = blend->cb_target_mask;
key.ps.mrt0_is_dual_src = blend->mrt0_is_dual_src;
if (pipeline->device->physical_device->rad_info.chip_class < GFX8) {
key.ps.is_int8 = blend->col_format_is_int8;
key.ps.is_int10 = blend->col_format_is_int10;

View File

@ -84,8 +84,10 @@ struct radv_pipeline_key {
uint32_t col_format;
uint32_t is_int8;
uint32_t is_int10;
uint32_t cb_target_mask;
uint8_t log2_ps_iter_samples;
uint8_t num_samples;
bool mrt0_is_dual_src;
bool lower_discard_to_demote;
bool enable_mrt_output_nan_fixup;