intel/fs: Handle regioning restrictions of split FP/DP pipelines.

The floating-point and double-precision FPU pipelines of XeHP
platforms don't support arbitrary regioning modes, corresponding
channels of sources and destination are required to be aligned to the
same sub-register offset, similar to the restriction FP64 instructions
had on CHV/BXT platforms.

Most violations of this restriction can be fixed easily by teaching
has_dst_aligned_region_restriction() about the change so the regioning
lowering pass gets rid of any unsupported regioning.  For cases where
this is not sufficient (e.g. because a virtual instruction internally
uses some regioning mode not supported by the floating-point pipeline)
the regioning lowering pass is extended with an additional
lower_exec_type() codepath that bit-casts sources and destination to
an integer type whenever the execution type is not supported by the
instruction.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
This commit is contained in:
Francisco Jerez 2021-04-06 13:04:26 -07:00 committed by Marge Bot
parent 0dc16965a9
commit f3e5cd813a
2 changed files with 83 additions and 17 deletions

View File

@ -185,6 +185,34 @@ namespace {
}
}
/**
* Return a non-zero value if the execution type of the instruction is
* unsupported. The destination and sources matching the returned mask
* will be bit-cast to an integer type of appropriate size, lowering any
* source or destination modifiers into separate MOV instructions.
*/
unsigned
has_invalid_exec_type(const gen_device_info *devinfo, const fs_inst *inst)
{
switch (inst->opcode) {
case SHADER_OPCODE_QUAD_SWIZZLE:
return has_dst_aligned_region_restriction(devinfo, inst) ?
0x1 : 0;
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:
return (((devinfo->ver == 7 && !devinfo->is_haswell) ||
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
(devinfo->verx10 >= 125 &&
brw_reg_type_is_floating_point(inst->src[0].type)) ?
0x1 : 0;
default:
return 0;
}
}
/*
* Return whether the instruction has unsupported source modifiers
* specified for the i-th source region.
@ -193,8 +221,11 @@ namespace {
has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst,
unsigned i)
{
return !inst->can_do_source_mods(devinfo) &&
(inst->src[i].negate || inst->src[i].abs);
return (!inst->can_do_source_mods(devinfo) &&
(inst->src[i].negate || inst->src[i].abs)) ||
((has_invalid_exec_type(devinfo, inst) & (1u << i)) &&
(inst->src[i].negate || inst->src[i].abs ||
inst->src[i].type != get_exec_type(inst)));
}
/*
@ -209,24 +240,27 @@ namespace {
return false;
case BRW_OPCODE_SEL:
return inst->dst.type != get_exec_type(inst);
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:
/* The source and destination types of these may be hard-coded to
* integer at codegen time due to hardware limitations of 64-bit
* types.
*/
return ((devinfo->ver == 7 && !devinfo->is_haswell) ||
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) &&
type_sz(inst->src[0].type) > 4 &&
inst->dst.type != inst->src[0].type;
default:
/* FIXME: We assume the opcodes don't explicitly mentioned before
* just work fine with arbitrary conversions.
/* FIXME: We assume the opcodes not explicitly mentioned before just
* work fine with arbitrary conversions, unless they need to be
* bit-cast.
*/
return false;
return has_invalid_exec_type(devinfo, inst) &&
inst->dst.type != get_exec_type(inst);
}
}
/**
* Return whether the instruction has unsupported destination modifiers.
*/
bool
has_invalid_dst_modifiers(const gen_device_info *devinfo, const fs_inst *inst)
{
return (has_invalid_exec_type(devinfo, inst) &&
(inst->saturate || inst->conditional_mod)) ||
has_invalid_conversion(devinfo, inst);
}
/**
* Return whether the instruction has non-standard semantics for the
* conditional mod which don't cause the flag register to be updated with
@ -423,6 +457,30 @@ namespace {
return true;
}
/**
* Bit-cast sources and destination of the instruction to an appropriate
* integer type, to be used in cases where the instruction doesn't support
* some other execution type.
*/
bool
lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst)
{
assert(inst->dst.type == get_exec_type(inst));
const unsigned mask = has_invalid_exec_type(v->devinfo, inst);
const brw_reg_type raw_type = brw_int_type(type_sz(inst->dst.type), false);
for (unsigned i = 0; i < inst->sources; i++) {
if (mask & (1u << i)) {
assert(inst->src[i].type == inst->dst.type);
inst->src[i].type = raw_type;
}
}
inst->dst.type = raw_type;
return true;
}
/**
* Legalize the source and destination regioning controls of the specified
* instruction.
@ -433,7 +491,7 @@ namespace {
const gen_device_info *devinfo = v->devinfo;
bool progress = false;
if (has_invalid_conversion(devinfo, inst))
if (has_invalid_dst_modifiers(devinfo, inst))
progress |= lower_dst_modifiers(v, block, inst);
if (has_invalid_dst_region(devinfo, inst))
@ -447,6 +505,9 @@ namespace {
progress |= lower_src_region(v, block, inst, i);
}
if (has_invalid_exec_type(devinfo, inst))
progress |= lower_exec_type(v, block, inst);
return progress;
}
}

View File

@ -570,7 +570,12 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
(type_sz(exec_type) == 4 && is_dword_multiply))
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
devinfo->verx10 >= 125;
else if (brw_reg_type_is_floating_point(inst->dst.type))
return devinfo->verx10 >= 125;
else
return false;
}