ac/nir: change es output lower param to esgs_itemsize

radeonsi may add extra dword to the stride, so let's pass it
directly.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16788>
This commit is contained in:
Qiang Yu 2022-05-31 14:18:17 +08:00
parent 8b5e8b2af7
commit 109eb378e5
3 changed files with 10 additions and 11 deletions

View File

@ -102,7 +102,7 @@ void
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
unsigned num_reserved_es_outputs);
unsigned esgs_itemsize);
void
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,

View File

@ -47,10 +47,8 @@ typedef struct {
/* I/O semantic -> real location used by lowering. */
ac_nir_map_io_driver_location map_io;
/* Number of ES outputs for which memory should be reserved.
* When compacted, this should be the number of linked ES outputs.
*/
unsigned num_reserved_es_outputs;
/* Stride of an ES invocation outputs in esgs ring, in bytes. */
unsigned esgs_itemsize;
} lower_esgs_io_state;
static nir_ssa_def *
@ -171,9 +169,8 @@ lower_es_output_store(nir_builder *b,
write_mask, true, true);
} else {
/* GFX9+: ES is merged into GS, data is passed through LDS. */
unsigned esgs_itemsize = st->num_reserved_es_outputs * 16u;
nir_ssa_def *vertex_idx = nir_build_load_local_invocation_index(b);
nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, esgs_itemsize), io_off);
nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask,
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
}
@ -267,11 +264,11 @@ void
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
enum amd_gfx_level gfx_level,
unsigned num_reserved_es_outputs)
unsigned esgs_itemsize)
{
lower_esgs_io_state state = {
.gfx_level = gfx_level,
.num_reserved_es_outputs = num_reserved_es_outputs,
.esgs_itemsize = esgs_itemsize,
.map_io = map,
};

View File

@ -1115,7 +1115,8 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
return true;
} else if (info->vs.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
device->physical_device->rad_info.gfx_level, info->vs.num_linked_outputs);
device->physical_device->rad_info.gfx_level,
info->vs.num_linked_outputs * 16u);
return true;
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
@ -1133,7 +1134,8 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
if (info->tes.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
device->physical_device->rad_info.gfx_level, info->tes.num_linked_outputs);
device->physical_device->rad_info.gfx_level,
info->tes.num_linked_outputs * 16u);
}
return true;