ac/nir/ngg: Split 16-bit MS output stores by components.

This is to prevent overwriting the high 16 bits.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28704>
This commit is contained in:
Timur Kristóf 2024-04-15 18:21:44 +02:00
parent c0c04d244c
commit 07cedd6486
1 changed files with 12 additions and 28 deletions

View File

@ -3769,32 +3769,6 @@ update_ms_output_info(const nir_io_semantics io_sem,
}
}
static nir_def *
regroup_store_val(nir_builder *b, nir_def *store_val)
{
/* Vulkan spec 15.1.4-15.1.5:
*
* The shader interface consists of output slots with 4x 32-bit components.
* Small bitsize components consume the same space as 32-bit components,
* but 64-bit ones consume twice as much.
*
* The same output slot may consist of components of different bit sizes.
* Therefore for simplicity we don't store small bitsize components
* contiguously, but pad them instead. In practice, they are converted to
* 32-bit and then stored contiguously.
*/
if (store_val->bit_size < 32) {
assert(store_val->num_components <= 4);
nir_def *comps[4] = {0};
for (unsigned c = 0; c < store_val->num_components; ++c)
comps[c] = nir_u2u32(b, nir_channel(b, store_val, c));
return nir_vec(b, comps, store_val->num_components);
}
return store_val;
}
static const ms_out_part *
ms_get_out_layout_part(unsigned location,
shader_info *info,
@ -3924,11 +3898,21 @@ ms_store_arrayed_output_intrin(nir_builder *b,
unsigned component_offset = nir_intrinsic_component(intrin);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
nir_def *store_val = regroup_store_val(b, intrin->src[0].ssa);
nir_def *store_val = intrin->src[0].ssa;
nir_def *arr_index = nir_get_io_arrayed_index_src(intrin)->ssa;
nir_src *base_off_src = nir_get_io_offset_src(intrin);
ms_store_arrayed_output(b, base_off_src, store_val, arr_index, io_sem, component_offset, write_mask, s);
if (store_val->bit_size < 32) {
/* Split 16-bit output stores to ensure each 16-bit component is stored
* in the correct location, without overwriting the other 16 bits there.
*/
u_foreach_bit(c, write_mask) {
nir_def *store_component = nir_channel(b, store_val, c);
ms_store_arrayed_output(b, base_off_src, store_component, arr_index, io_sem, c + component_offset, 1, s);
}
} else {
ms_store_arrayed_output(b, base_off_src, store_val, arr_index, io_sem, component_offset, write_mask, s);
}
}
static nir_def *