intel/fs/copy_prop: check stride constraints with actual final type
In some cases we will change the type of the destination register of an instruction. This is the type we should use to verify that we're allow to do the replacement. Otherwise we can hit restrictions on CHV and upcoming Xe-Hp for instance where the copy propagation transforms this : send(16) (mlen: 2) vgrf10:UD, 0u, 0u, vgrf35:D, null:UD mov(16) vgrf11:UW, vgrf10<2>:UW mov(16) vgrf12:UW, vgrf10+0.2<2>:UW mov(16) vgrf15:HF, |vgrf11|:HF mov(16) vgrf16:HF, |vgrf12|:HF mov(8) vgrf41<2>:UW, vgrf15+0.0:UW group0 mov(8) vgrf42<2>:UW, vgrf15+0.16:UW group8 mov(8) vgrf45<2>:UW, vgrf16+0.0:UW group0 mov(8) vgrf46<2>:UW, vgrf16+0.16:UW group8 into this : send(16) (mlen: 2) vgrf10:UD, 0u, 0u, vgrf35:D, null:UD mov(8) vgrf41<2>:HF, |vgrf10+0.0|<2>:HF group0 mov(8) vgrf42<2>:HF, |vgrf10+1.0|<2>:HF group8 mov(8) vgrf45<2>:HF, |vgrf10+0.2|<2>:HF group0 mov(8) vgrf46<2>:HF, |vgrf10+1.2|<2>:HF group8 Because of the floating point use, stride and offets should be the same. v2: Fix final destination type selection (Curro) v3: constify (Curro) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: <mesa-stable@lists.freedesktop.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9832>
This commit is contained in:
parent
0b35987895
commit
aa53665fda
|
@ -367,7 +367,8 @@ is_logic_op(enum opcode opcode)
|
|||
}
|
||||
|
||||
static bool
|
||||
can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
|
||||
can_take_stride(fs_inst *inst, brw_reg_type dst_type,
|
||||
unsigned arg, unsigned stride,
|
||||
const gen_device_info *devinfo)
|
||||
{
|
||||
if (stride > 4)
|
||||
|
@ -377,9 +378,9 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
|
|||
* of the corresponding channel of the destination, and the provided stride
|
||||
* would break this restriction.
|
||||
*/
|
||||
if (has_dst_aligned_region_restriction(devinfo, inst) &&
|
||||
if (has_dst_aligned_region_restriction(devinfo, inst, dst_type) &&
|
||||
!(type_sz(inst->src[arg].type) * stride ==
|
||||
type_sz(inst->dst.type) * inst->dst.stride ||
|
||||
type_sz(dst_type) * inst->dst.stride ||
|
||||
stride == 0))
|
||||
return false;
|
||||
|
||||
|
@ -534,10 +535,15 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
|||
if (instruction_requires_packed_data(inst) && entry_stride != 1)
|
||||
return false;
|
||||
|
||||
const brw_reg_type dst_type = (has_source_modifiers &&
|
||||
entry->dst.type != inst->src[arg].type) ?
|
||||
entry->dst.type : inst->dst.type;
|
||||
|
||||
/* Bail if the result of composing both strides would exceed the
|
||||
* hardware limit.
|
||||
*/
|
||||
if (!can_take_stride(inst, arg, entry_stride * inst->src[arg].stride,
|
||||
if (!can_take_stride(inst, dst_type, arg,
|
||||
entry_stride * inst->src[arg].stride,
|
||||
devinfo))
|
||||
return false;
|
||||
|
||||
|
|
|
@ -553,7 +553,8 @@ is_unordered(const fs_inst *inst)
|
|||
*/
|
||||
static inline bool
|
||||
has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
||||
const fs_inst *inst)
|
||||
const fs_inst *inst,
|
||||
brw_reg_type dst_type)
|
||||
{
|
||||
const brw_reg_type exec_type = get_exec_type(inst);
|
||||
/* Even though the hardware spec claims that "integer DWord multiply"
|
||||
|
@ -567,13 +568,20 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
|||
(inst->opcode == BRW_OPCODE_MAD &&
|
||||
MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
|
||||
|
||||
if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
|
||||
if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
|
||||
(type_sz(exec_type) == 4 && is_dword_multiply))
|
||||
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
||||
const fs_inst *inst)
|
||||
{
|
||||
return has_dst_aligned_region_restriction(devinfo, inst, inst->dst.type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
|
||||
* the specified register file into a VGRF.
|
||||
|
|
Loading…
Reference in New Issue