intel/fs: Take into account region strides during SIMD lowering decision of SHUFFLE.
This fixes a bug in the handcrafted SIMD lowering done by the SHUFFLE
code generation, which wasn't taking into account the source and
destination region strides while deciding whether it needs to split an
instruction.
v2: Use new element_sz() helper instead of left shift. (Lionel)
Fixes: 90c9f29518
("i965/fs: Add support for nir_intrinsic_shuffle")
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14273>
This commit is contained in:
parent
44e48751d2
commit
d1038197f3
|
@ -616,8 +616,8 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
* easier just to split it here.
|
||||
*/
|
||||
const unsigned lower_width =
|
||||
(devinfo->ver <= 7 || type_sz(src.type) > 4) ?
|
||||
8 : MIN2(16, inst->exec_size);
|
||||
devinfo->ver <= 7 || element_sz(src) > 4 || element_sz(dst) > 4 ? 8 :
|
||||
MIN2(16, inst->exec_size);
|
||||
|
||||
brw_set_default_exec_size(p, cvt(lower_width) - 1);
|
||||
for (unsigned group = 0; group < inst->exec_size; group += lower_width) {
|
||||
|
|
|
@ -1238,6 +1238,28 @@ region_matches(struct brw_reg reg, enum brw_vertical_stride v,
|
|||
region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
|
||||
BRW_HORIZONTAL_STRIDE_0)
|
||||
|
||||
/**
|
||||
* Return the size in bytes per data element of register \p reg on the
|
||||
* corresponding register file.
|
||||
*/
|
||||
static inline unsigned
|
||||
element_sz(struct brw_reg reg)
|
||||
{
|
||||
if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) {
|
||||
return type_sz(reg.type);
|
||||
|
||||
} else if (reg.width == BRW_WIDTH_1 &&
|
||||
reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||
assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
|
||||
return type_sz(reg.type) << (reg.vstride - 1);
|
||||
|
||||
} else {
|
||||
assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
|
||||
assert(reg.vstride == reg.hstride + reg.width);
|
||||
return type_sz(reg.type) << (reg.hstride - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* brw_packed_float.c */
|
||||
int brw_float_to_vf(float f);
|
||||
float brw_vf_to_float(unsigned char vf);
|
||||
|
|
Loading…
Reference in New Issue