radv: Always use fixed I/O locations for TCS outputs in VRAM.

The goal of this patch is to make the TCS->TES shader I/O
independent of assigned I/O driver locations.

Always using the unlinked approach means a larger stride when
calculating some memory addresses, but otherwise should have no
perf impact whatsoever, because this only affects how TCS
outputs are stored to VRAM, and doesn't affect how they are
stored in LDS.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28488>
This commit is contained in:
Timur Kristóf 2024-04-02 00:21:38 +02:00 committed by Marge Bot
parent 892ebf2040
commit 0e481a4adc
2 changed files with 10 additions and 6 deletions

View File

@ -141,12 +141,12 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level, info->tcs.tes_inputs_read,
info->tcs.tes_patch_inputs_read, info->wave_size, false, false);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, radv_map_io_driver_location, pdev->info.gfx_level,
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->wave_size, false, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input);
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, radv_map_io_driver_location);
if (info->tes.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);

View File

@ -1474,12 +1474,16 @@ radv_link_tcs(const struct radv_device *device, struct radv_shader_stage *tcs_st
/* Copy TCS info into the TES info */
merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations(tcs_stage->nir, tes_stage->nir);
/* Count the number of per-vertex output slots we need to reserve for the TCS and TES. */
const uint64_t nir_mask = tcs_stage->nir->info.outputs_written & tes_stage->nir->info.inputs_read &
~(VARYING_SLOT_TESS_LEVEL_OUTER | VARYING_SLOT_TESS_LEVEL_INNER);
const uint64_t io_mask = radv_gather_unlinked_io_mask(nir_mask);
const unsigned num_reserved_outputs = util_last_bit64(io_mask);
tcs_stage->info.tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
tcs_stage->info.tcs.num_linked_outputs = num_reserved_outputs;
tcs_stage->info.outputs_linked = true;
tes_stage->info.tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
tes_stage->info.tes.num_linked_inputs = num_reserved_outputs;
tes_stage->info.inputs_linked = true;
}