intel/fs: Take into account region strides during SIMD lowering decision of SHUFFLE.

This fixes a bug in the handcrafted SIMD lowering done by the SHUFFLE
code generation, which wasn't taking into account the source and
destination region strides while deciding whether it needs to split an
instruction.

v2: Use new element_sz() helper instead of left shift. (Lionel)

Fixes: 90c9f29518 ("i965/fs: Add support for nir_intrinsic_shuffle")
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14273>
This commit is contained in:
Francisco Jerez 2021-12-20 01:54:57 -08:00 committed by Marge Bot
parent 44e48751d2
commit d1038197f3
2 changed files with 24 additions and 2 deletions

View File

@ -616,8 +616,8 @@ fs_generator::generate_shuffle(fs_inst *inst,
* easier just to split it here.
*/
const unsigned lower_width =
(devinfo->ver <= 7 || type_sz(src.type) > 4) ?
8 : MIN2(16, inst->exec_size);
devinfo->ver <= 7 || element_sz(src) > 4 || element_sz(dst) > 4 ? 8 :
MIN2(16, inst->exec_size);
brw_set_default_exec_size(p, cvt(lower_width) - 1);
for (unsigned group = 0; group < inst->exec_size; group += lower_width) {

View File

@ -1238,6 +1238,28 @@ region_matches(struct brw_reg reg, enum brw_vertical_stride v,
region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
BRW_HORIZONTAL_STRIDE_0)
/**
* Return the size in bytes per data element of register \p reg on the
* corresponding register file.
*/
static inline unsigned
element_sz(struct brw_reg reg)
{
if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) {
return type_sz(reg.type);
} else if (reg.width == BRW_WIDTH_1 &&
reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
return type_sz(reg.type) << (reg.vstride - 1);
} else {
assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
assert(reg.vstride == reg.hstride + reg.width);
return type_sz(reg.type) << (reg.hstride - 1);
}
}
/* brw_packed_float.c */
int brw_float_to_vf(float f);
float brw_vf_to_float(unsigned char vf);