intel/fs: Handle regioning restrictions of split FP/DP pipelines.
The floating-point and double-precision FPU pipelines of XeHP platforms don't support arbitrary regioning modes, corresponding channels of sources and destination are required to be aligned to the same sub-register offset, similar to the restriction FP64 instructions had on CHV/BXT platforms. Most violations of this restriction can be fixed easily by teaching has_dst_aligned_region_restriction() about the change so the regioning lowering pass gets rid of any unsupported regioning. For cases where this is not sufficient (e.g. because a virtual instruction internally uses some regioning mode not supported by the floating-point pipeline) the regioning lowering pass is extended with an additional lower_exec_type() codepath that bit-casts sources and destination to an integer type whenever the execution type is not supported by the instruction. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
This commit is contained in:
parent
0dc16965a9
commit
f3e5cd813a
|
@ -185,6 +185,34 @@ namespace {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a non-zero value if the execution type of the instruction is
|
||||||
|
* unsupported. The destination and sources matching the returned mask
|
||||||
|
* will be bit-cast to an integer type of appropriate size, lowering any
|
||||||
|
* source or destination modifiers into separate MOV instructions.
|
||||||
|
*/
|
||||||
|
unsigned
|
||||||
|
has_invalid_exec_type(const gen_device_info *devinfo, const fs_inst *inst)
|
||||||
|
{
|
||||||
|
switch (inst->opcode) {
|
||||||
|
case SHADER_OPCODE_QUAD_SWIZZLE:
|
||||||
|
return has_dst_aligned_region_restriction(devinfo, inst) ?
|
||||||
|
0x1 : 0;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_BROADCAST:
|
||||||
|
case SHADER_OPCODE_MOV_INDIRECT:
|
||||||
|
return (((devinfo->ver == 7 && !devinfo->is_haswell) ||
|
||||||
|
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
|
||||||
|
devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) ||
|
||||||
|
(devinfo->verx10 >= 125 &&
|
||||||
|
brw_reg_type_is_floating_point(inst->src[0].type)) ?
|
||||||
|
0x1 : 0;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return whether the instruction has unsupported source modifiers
|
* Return whether the instruction has unsupported source modifiers
|
||||||
* specified for the i-th source region.
|
* specified for the i-th source region.
|
||||||
|
@ -193,8 +221,11 @@ namespace {
|
||||||
has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst,
|
has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst,
|
||||||
unsigned i)
|
unsigned i)
|
||||||
{
|
{
|
||||||
return !inst->can_do_source_mods(devinfo) &&
|
return (!inst->can_do_source_mods(devinfo) &&
|
||||||
(inst->src[i].negate || inst->src[i].abs);
|
(inst->src[i].negate || inst->src[i].abs)) ||
|
||||||
|
((has_invalid_exec_type(devinfo, inst) & (1u << i)) &&
|
||||||
|
(inst->src[i].negate || inst->src[i].abs ||
|
||||||
|
inst->src[i].type != get_exec_type(inst)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -209,24 +240,27 @@ namespace {
|
||||||
return false;
|
return false;
|
||||||
case BRW_OPCODE_SEL:
|
case BRW_OPCODE_SEL:
|
||||||
return inst->dst.type != get_exec_type(inst);
|
return inst->dst.type != get_exec_type(inst);
|
||||||
case SHADER_OPCODE_BROADCAST:
|
|
||||||
case SHADER_OPCODE_MOV_INDIRECT:
|
|
||||||
/* The source and destination types of these may be hard-coded to
|
|
||||||
* integer at codegen time due to hardware limitations of 64-bit
|
|
||||||
* types.
|
|
||||||
*/
|
|
||||||
return ((devinfo->ver == 7 && !devinfo->is_haswell) ||
|
|
||||||
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) &&
|
|
||||||
type_sz(inst->src[0].type) > 4 &&
|
|
||||||
inst->dst.type != inst->src[0].type;
|
|
||||||
default:
|
default:
|
||||||
/* FIXME: We assume the opcodes don't explicitly mentioned before
|
/* FIXME: We assume the opcodes not explicitly mentioned before just
|
||||||
* just work fine with arbitrary conversions.
|
* work fine with arbitrary conversions, unless they need to be
|
||||||
|
* bit-cast.
|
||||||
*/
|
*/
|
||||||
return false;
|
return has_invalid_exec_type(devinfo, inst) &&
|
||||||
|
inst->dst.type != get_exec_type(inst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether the instruction has unsupported destination modifiers.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
has_invalid_dst_modifiers(const gen_device_info *devinfo, const fs_inst *inst)
|
||||||
|
{
|
||||||
|
return (has_invalid_exec_type(devinfo, inst) &&
|
||||||
|
(inst->saturate || inst->conditional_mod)) ||
|
||||||
|
has_invalid_conversion(devinfo, inst);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return whether the instruction has non-standard semantics for the
|
* Return whether the instruction has non-standard semantics for the
|
||||||
* conditional mod which don't cause the flag register to be updated with
|
* conditional mod which don't cause the flag register to be updated with
|
||||||
|
@ -423,6 +457,30 @@ namespace {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bit-cast sources and destination of the instruction to an appropriate
|
||||||
|
* integer type, to be used in cases where the instruction doesn't support
|
||||||
|
* some other execution type.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst)
|
||||||
|
{
|
||||||
|
assert(inst->dst.type == get_exec_type(inst));
|
||||||
|
const unsigned mask = has_invalid_exec_type(v->devinfo, inst);
|
||||||
|
const brw_reg_type raw_type = brw_int_type(type_sz(inst->dst.type), false);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < inst->sources; i++) {
|
||||||
|
if (mask & (1u << i)) {
|
||||||
|
assert(inst->src[i].type == inst->dst.type);
|
||||||
|
inst->src[i].type = raw_type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inst->dst.type = raw_type;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Legalize the source and destination regioning controls of the specified
|
* Legalize the source and destination regioning controls of the specified
|
||||||
* instruction.
|
* instruction.
|
||||||
|
@ -433,7 +491,7 @@ namespace {
|
||||||
const gen_device_info *devinfo = v->devinfo;
|
const gen_device_info *devinfo = v->devinfo;
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
if (has_invalid_conversion(devinfo, inst))
|
if (has_invalid_dst_modifiers(devinfo, inst))
|
||||||
progress |= lower_dst_modifiers(v, block, inst);
|
progress |= lower_dst_modifiers(v, block, inst);
|
||||||
|
|
||||||
if (has_invalid_dst_region(devinfo, inst))
|
if (has_invalid_dst_region(devinfo, inst))
|
||||||
|
@ -447,6 +505,9 @@ namespace {
|
||||||
progress |= lower_src_region(v, block, inst, i);
|
progress |= lower_src_region(v, block, inst, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (has_invalid_exec_type(devinfo, inst))
|
||||||
|
progress |= lower_exec_type(v, block, inst);
|
||||||
|
|
||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -570,7 +570,12 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
||||||
|
|
||||||
if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
|
if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
|
||||||
(type_sz(exec_type) == 4 && is_dword_multiply))
|
(type_sz(exec_type) == 4 && is_dword_multiply))
|
||||||
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
|
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
|
||||||
|
devinfo->verx10 >= 125;
|
||||||
|
|
||||||
|
else if (brw_reg_type_is_floating_point(inst->dst.type))
|
||||||
|
return devinfo->verx10 >= 125;
|
||||||
|
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue