nir: fix lower_memcpy
memcpy is divided into chunks that are vec4 sized max. The problem
here happens with a structure of 24 bytes :
struct {
float3 a;
float3 b;
}
If you memcpy that struct, the lowering will emit 2 load/store, one of
sized 8, next one sized 16. But both end up located at offset 0, so we
effectively drop 2 floats.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: a3177cca99
("nir: Add a lowering pass to lower memcpy")
Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15049>
This commit is contained in:
parent
bc63802596
commit
768930a73a
|
@ -111,11 +111,14 @@ lower_memcpy_impl(nir_function_impl *impl)
|
|||
uint64_t size = nir_src_as_uint(cpy->src[2]);
|
||||
uint64_t offset = 0;
|
||||
while (offset < size) {
|
||||
uint64_t remaining = offset - size;
|
||||
/* For our chunk size, we choose the largest power-of-two that
|
||||
* divides size with a maximum of 16B (a vec4).
|
||||
uint64_t remaining = size - offset;
|
||||
/* Find the largest chunk size power-of-two (MSB in remaining)
|
||||
* and limit our chunk to 16B (a vec4). It's important to do as
|
||||
* many 16B chunks as possible first so that the index
|
||||
* computation is correct for
|
||||
* memcpy_(load|store)_deref_elem_imm.
|
||||
*/
|
||||
unsigned copy_size = 1u << MIN2(ffsll(remaining) - 1, 4);
|
||||
unsigned copy_size = 1u << MIN2(util_last_bit64(remaining) - 1, 4);
|
||||
const struct glsl_type *copy_type =
|
||||
copy_type_for_byte_size(copy_size);
|
||||
|
||||
|
|
Loading…
Reference in New Issue