intel/fs: Handle regioning restrictions of split FP/DP pipelines.
The floating-point and double-precision FPU pipelines of XeHP platforms don't support arbitrary regioning modes, corresponding channels of sources and destination are required to be aligned to the same sub-register offset, similar to the restriction FP64 instructions had on CHV/BXT platforms. Most violations of this restriction can be fixed easily by teaching has_dst_aligned_region_restriction() about the change so the regioning lowering pass gets rid of any unsupported regioning. For cases where this is not sufficient (e.g. because a virtual instruction internally uses some regioning mode not supported by the floating-point pipeline) the regioning lowering pass is extended with an additional lower_exec_type() codepath that bit-casts sources and destination to an integer type whenever the execution type is not supported by the instruction. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
This commit is contained in:
parent
0dc16965a9
commit
f3e5cd813a
|
@ -185,6 +185,34 @@ namespace {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a non-zero value if the execution type of the instruction is
|
||||
* unsupported. The destination and sources matching the returned mask
|
||||
* will be bit-cast to an integer type of appropriate size, lowering any
|
||||
* source or destination modifiers into separate MOV instructions.
|
||||
*/
|
||||
unsigned
|
||||
has_invalid_exec_type(const gen_device_info *devinfo, const fs_inst *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_QUAD_SWIZZLE:
|
||||
return has_dst_aligned_region_restriction(devinfo, inst) ?
|
||||
0x1 : 0;
|
||||
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
return (((devinfo->ver == 7 && !devinfo->is_haswell) ||
|
||||
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
|
||||
devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
|
||||
(devinfo->verx10 >= 125 &&
|
||||
brw_reg_type_is_floating_point(inst->src[0].type)) ?
|
||||
0x1 : 0;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return whether the instruction has unsupported source modifiers
|
||||
* specified for the i-th source region.
|
||||
|
@ -193,8 +221,11 @@ namespace {
|
|||
has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst,
|
||||
unsigned i)
|
||||
{
|
||||
return !inst->can_do_source_mods(devinfo) &&
|
||||
(inst->src[i].negate || inst->src[i].abs);
|
||||
return (!inst->can_do_source_mods(devinfo) &&
|
||||
(inst->src[i].negate || inst->src[i].abs)) ||
|
||||
((has_invalid_exec_type(devinfo, inst) & (1u << i)) &&
|
||||
(inst->src[i].negate || inst->src[i].abs ||
|
||||
inst->src[i].type != get_exec_type(inst)));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -209,24 +240,27 @@ namespace {
|
|||
return false;
|
||||
case BRW_OPCODE_SEL:
|
||||
return inst->dst.type != get_exec_type(inst);
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
/* The source and destination types of these may be hard-coded to
|
||||
* integer at codegen time due to hardware limitations of 64-bit
|
||||
* types.
|
||||
*/
|
||||
return ((devinfo->ver == 7 && !devinfo->is_haswell) ||
|
||||
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) &&
|
||||
type_sz(inst->src[0].type) > 4 &&
|
||||
inst->dst.type != inst->src[0].type;
|
||||
default:
|
||||
/* FIXME: We assume the opcodes don't explicitly mentioned before
|
||||
* just work fine with arbitrary conversions.
|
||||
/* FIXME: We assume the opcodes not explicitly mentioned before just
|
||||
* work fine with arbitrary conversions, unless they need to be
|
||||
* bit-cast.
|
||||
*/
|
||||
return false;
|
||||
return has_invalid_exec_type(devinfo, inst) &&
|
||||
inst->dst.type != get_exec_type(inst);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the instruction has unsupported destination modifiers.
|
||||
*/
|
||||
bool
|
||||
has_invalid_dst_modifiers(const gen_device_info *devinfo, const fs_inst *inst)
|
||||
{
|
||||
return (has_invalid_exec_type(devinfo, inst) &&
|
||||
(inst->saturate || inst->conditional_mod)) ||
|
||||
has_invalid_conversion(devinfo, inst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the instruction has non-standard semantics for the
|
||||
* conditional mod which don't cause the flag register to be updated with
|
||||
|
@ -423,6 +457,30 @@ namespace {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bit-cast sources and destination of the instruction to an appropriate
|
||||
* integer type, to be used in cases where the instruction doesn't support
|
||||
* some other execution type.
|
||||
*/
|
||||
bool
|
||||
lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
assert(inst->dst.type == get_exec_type(inst));
|
||||
const unsigned mask = has_invalid_exec_type(v->devinfo, inst);
|
||||
const brw_reg_type raw_type = brw_int_type(type_sz(inst->dst.type), false);
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (mask & (1u << i)) {
|
||||
assert(inst->src[i].type == inst->dst.type);
|
||||
inst->src[i].type = raw_type;
|
||||
}
|
||||
}
|
||||
|
||||
inst->dst.type = raw_type;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Legalize the source and destination regioning controls of the specified
|
||||
* instruction.
|
||||
|
@ -433,7 +491,7 @@ namespace {
|
|||
const gen_device_info *devinfo = v->devinfo;
|
||||
bool progress = false;
|
||||
|
||||
if (has_invalid_conversion(devinfo, inst))
|
||||
if (has_invalid_dst_modifiers(devinfo, inst))
|
||||
progress |= lower_dst_modifiers(v, block, inst);
|
||||
|
||||
if (has_invalid_dst_region(devinfo, inst))
|
||||
|
@ -447,6 +505,9 @@ namespace {
|
|||
progress |= lower_src_region(v, block, inst, i);
|
||||
}
|
||||
|
||||
if (has_invalid_exec_type(devinfo, inst))
|
||||
progress |= lower_exec_type(v, block, inst);
|
||||
|
||||
return progress;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -570,7 +570,12 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
|||
|
||||
if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
|
||||
(type_sz(exec_type) == 4 && is_dword_multiply))
|
||||
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
|
||||
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
|
||||
devinfo->verx10 >= 125;
|
||||
|
||||
else if (brw_reg_type_is_floating_point(inst->dst.type))
|
||||
return devinfo->verx10 >= 125;
|
||||
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue