i965/fs: generalize the legalization d2x pass

Generalize it to lower any unsupported narrower conversion.

v2 (Curro):
- Add supports_type_conversion()
- Reuse existing intruction instead of cloning it.
- Generalize d2x to narrower and equal size conversions.

v3 (Curro):
- Make supports_type_conversion() const and improve it.
- Use foreach_block_and_inst to process added instructions.
- Simplify code.
- Add assert and improve comments.
- Remove redundant mov.
- Remove useless comment.
- Remove saturate == false assert and add support for saturation
  when fixing the conversion.
- Add get_exec_type() function.

v4 (Curro):
- Use get_exec_type() function to get sources' type.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
Samuel Iglesias Gonsálvez 2017-01-20 08:47:05 +01:00 committed by Francisco Jerez
parent 94ffeb7fa2
commit aeecc82d05
2 changed files with 69 additions and 39 deletions

View File

@ -5694,11 +5694,6 @@ fs_visitor::optimize()
OPT(dead_code_eliminate);
}
if (OPT(lower_d2x)) {
OPT(opt_copy_propagation);
OPT(dead_code_eliminate);
}
OPT(lower_simd_width);
/* After SIMD lowering just in case we had to unroll the EOT send. */
@ -5745,6 +5740,12 @@ fs_visitor::optimize()
OPT(dead_code_eliminate);
}
if (OPT(lower_d2x)) {
OPT(opt_copy_propagation);
OPT(dead_code_eliminate);
OPT(lower_simd_width);
}
lower_uniform_pull_constant_loads();
validate();

View File

@ -27,48 +27,77 @@
using namespace brw;
static bool
supports_type_conversion(const fs_inst *inst) {
switch (inst->opcode) {
case BRW_OPCODE_MOV:
case SHADER_OPCODE_MOV_INDIRECT:
return true;
case BRW_OPCODE_SEL:
return inst->dst.type == get_exec_type(inst);
default:
/* FIXME: We assume the opcodes don't explicitly mentioned
* before just work fine with arbitrary conversions.
*/
return true;
}
}
bool
fs_visitor::lower_d2x()
{
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
if (inst->opcode != BRW_OPCODE_MOV)
continue;
if (inst->dst.type != BRW_REGISTER_TYPE_F &&
inst->dst.type != BRW_REGISTER_TYPE_D &&
inst->dst.type != BRW_REGISTER_TYPE_UD)
continue;
if (inst->src[0].type != BRW_REGISTER_TYPE_DF &&
inst->src[0].type != BRW_REGISTER_TYPE_UQ &&
inst->src[0].type != BRW_REGISTER_TYPE_Q)
continue;
assert(inst->dst.file == VGRF);
assert(inst->saturate == false);
fs_reg dst = inst->dst;
foreach_block_and_inst(block, fs_inst, inst, cfg) {
const fs_builder ibld(this, block, inst);
fs_reg dst = inst->dst;
bool saturate = inst->saturate;
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
* Single Precision Float":
*
* The upper Dword of every Qword will be written with undefined
* value when converting DF to F.
*
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
*/
fs_reg temp = ibld.vgrf(inst->src[0].type, 1);
fs_reg strided_temp = subscript(temp, inst->dst.type, 0);
ibld.MOV(strided_temp, inst->src[0]);
ibld.MOV(dst, strided_temp);
if (supports_type_conversion(inst)) {
if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
* Single Precision Float":
*
* The upper Dword of every Qword will be written with undefined
* value when converting DF to F.
*
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
inst->remove(block);
progress = true;
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
inst->dst = strided_temp;
inst->saturate = false;
/* As it is an strided destination, we write n-times more being n the
* size ratio between source and destination types. Update
* size_written accordingly.
*/
inst->size_written = inst->dst.component_size(inst->exec_size);
ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate;
progress = true;
}
} else {
fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
inst->dst = temp0;
/* As it is an strided destination, we write n-times more being n the
* size ratio between source and destination types. Update
* size_written accordingly.
*/
inst->size_written = inst->dst.component_size(inst->exec_size);
inst->saturate = false;
/* Now, do the conversion to original destination's type. In next iteration,
* we will lower it if it is a d2f conversion.
*/
ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
progress = true;
}
}
if (progress)