intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>
This commit is contained in:
Kenneth Graunke 2024-03-27 16:02:18 -07:00 committed by Marge Bot
parent a520c976a5
commit 9e0d0190ea
1 changed files with 56 additions and 77 deletions

View File

@ -1957,12 +1957,11 @@ brw_broadcast(struct brw_codegen *p,
struct brw_reg idx)
{
const struct intel_device_info *devinfo = p->devinfo;
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
brw_inst *inst;
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
assert(src.file == BRW_GENERAL_REGISTER_FILE &&
src.address_mode == BRW_ADDRESS_DIRECT);
@ -1980,15 +1979,14 @@ brw_broadcast(struct brw_codegen *p,
src.type = dst.type = brw_reg_type_from_bit_size(type_sz(src.type) * 8,
BRW_REGISTER_TYPE_UD);
if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
if ((src.vstride == 0 && src.hstride == 0) ||
idx.file == BRW_IMMEDIATE_VALUE) {
/* Trivial, the source is already uniform or the index is a constant.
* We will typically not get here if the optimizer is doing its job, but
* asserting would be mean.
*/
const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
stride(suboffset(src, 4 * i), 0, 4, 1);
src = stride(suboffset(src, i), 0, 1, 0);
if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) {
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
@ -2014,7 +2012,6 @@ brw_broadcast(struct brw_codegen *p,
*/
assert(src.subnr == 0);
if (align1) {
const struct brw_reg addr =
retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
unsigned offset = src.nr * REG_SIZE + src.subnr;
@ -2074,24 +2071,6 @@ brw_broadcast(struct brw_codegen *p,
brw_MOV(p, dst,
retype(brw_vec1_indirect(addr.subnr, offset), src.type));
}
} else {
/* In SIMD4x2 mode the index can be either zero or one, replicate it
* to all bits of a flag register,
*/
inst = brw_MOV(p,
brw_null_reg(),
stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
/* and use predicated SEL to pick the right channel. */
inst = brw_SEL(p, dst,
stride(suboffset(src, 4), 4, 4, 1),
stride(src, 4, 4, 1));
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
}
}
brw_pop_insn_state(p);