mirror of https://gitlab.freedesktop.org/mesa/mesa
intel/brw: Drop align16 support in brw_broadcast()
align16 support is only used on Gen9 for 3-source instructions, quad swizzling, and dPdy calculations. We don't need it for broadcast. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>
This commit is contained in:
parent
a520c976a5
commit
9e0d0190ea
|
@ -1957,12 +1957,11 @@ brw_broadcast(struct brw_codegen *p,
|
|||
struct brw_reg idx)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
||||
brw_inst *inst;
|
||||
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
|
||||
assert(src.file == BRW_GENERAL_REGISTER_FILE &&
|
||||
src.address_mode == BRW_ADDRESS_DIRECT);
|
||||
|
@ -1980,15 +1979,14 @@ brw_broadcast(struct brw_codegen *p,
|
|||
src.type = dst.type = brw_reg_type_from_bit_size(type_sz(src.type) * 8,
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
|
||||
if ((src.vstride == 0 && src.hstride == 0) ||
|
||||
idx.file == BRW_IMMEDIATE_VALUE) {
|
||||
/* Trivial, the source is already uniform or the index is a constant.
|
||||
* We will typically not get here if the optimizer is doing its job, but
|
||||
* asserting would be mean.
|
||||
*/
|
||||
const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
|
||||
src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
|
||||
stride(suboffset(src, 4 * i), 0, 4, 1);
|
||||
src = stride(suboffset(src, i), 0, 1, 0);
|
||||
|
||||
if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) {
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
||||
|
@ -2014,7 +2012,6 @@ brw_broadcast(struct brw_codegen *p,
|
|||
*/
|
||||
assert(src.subnr == 0);
|
||||
|
||||
if (align1) {
|
||||
const struct brw_reg addr =
|
||||
retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
||||
unsigned offset = src.nr * REG_SIZE + src.subnr;
|
||||
|
@ -2074,24 +2071,6 @@ brw_broadcast(struct brw_codegen *p,
|
|||
brw_MOV(p, dst,
|
||||
retype(brw_vec1_indirect(addr.subnr, offset), src.type));
|
||||
}
|
||||
} else {
|
||||
/* In SIMD4x2 mode the index can be either zero or one, replicate it
|
||||
* to all bits of a flag register,
|
||||
*/
|
||||
inst = brw_MOV(p,
|
||||
brw_null_reg(),
|
||||
stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
|
||||
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
|
||||
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
|
||||
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
|
||||
|
||||
/* and use predicated SEL to pick the right channel. */
|
||||
inst = brw_SEL(p, dst,
|
||||
stride(suboffset(src, 4), 4, 4, 1),
|
||||
stride(src, 4, 4, 1));
|
||||
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
|
||||
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
|
||||
}
|
||||
}
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
|
|
Loading…
Reference in New Issue