Revert "i965/fs: Merge CMP and SEL into CSEL on Gen8+"

This reverts commit 52c7df1643.  The pass,
while clearly useful for some shaders, has at least three bugs that I
was able to find fairly quickly:

 1. It doesn't work for type-converting MOVs because f > 0 is not the
    same as f2i(f) > 0

 2. CSEL is a 3src instruction and only supports one source type; it
    doesn't take this into account and tries to create instructions
    which do a F compare and a D select.  This is especially nasty to
    debug because you don't see that in the dumped assembly because we
    don't properly assert that types are the same in codegen.

 3. While you can handle 2, in theory, by reinterpreting types, you
    can't do that in the presence of source modifiers.  This pass
    doesn't even attempt to detect that.

Those are just the ones I found with the one almost trival shader I was
debugging.  There very likely may be more and.  Best thing to do for now
is just shut it off until someone has the time to figure out how to do
this properly and write tests to ensure it's correct.

Fixes: 3cb085e6d61a "i965/fs: Merge CMP and SEL into CSEL on Gen8+"
Reviewed-by: Brian Paul <brianp@vmware.com>
This commit is contained in:
Jason Ekstrand 2019-11-01 14:22:35 -05:00
parent 8d7621a53f
commit 2fca325ea6
2 changed files with 0 additions and 108 deletions

View File

@ -3142,107 +3142,6 @@ mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned ds)
return ((1 << n) - 1) << shift;
}
bool
fs_visitor::opt_peephole_csel()
{
if (devinfo->gen < 8)
return false;
bool progress = false;
foreach_block_reverse(block, cfg) {
int ip = block->end_ip + 1;
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
ip--;
if (inst->opcode != BRW_OPCODE_SEL ||
inst->predicate != BRW_PREDICATE_NORMAL ||
(inst->dst.type != BRW_REGISTER_TYPE_F &&
inst->dst.type != BRW_REGISTER_TYPE_D &&
inst->dst.type != BRW_REGISTER_TYPE_UD))
continue;
/* Because it is a 3-src instruction, CSEL cannot have an immediate
* value as a source, but we can sometimes handle zero.
*/
if ((inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
inst->src[0].file != UNIFORM) ||
(inst->src[1].file != VGRF && inst->src[1].file != ATTR &&
inst->src[1].file != UNIFORM && !inst->src[1].is_zero()))
continue;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (!scan_inst->flags_written())
continue;
if ((scan_inst->opcode != BRW_OPCODE_CMP &&
scan_inst->opcode != BRW_OPCODE_MOV) ||
scan_inst->predicate != BRW_PREDICATE_NONE ||
(scan_inst->src[0].file != VGRF &&
scan_inst->src[0].file != ATTR &&
scan_inst->src[0].file != UNIFORM) ||
scan_inst->src[0].type != BRW_REGISTER_TYPE_F)
break;
if (scan_inst->opcode == BRW_OPCODE_CMP && !scan_inst->src[1].is_zero())
break;
const brw::fs_builder ibld(this, block, inst);
const enum brw_conditional_mod cond =
inst->predicate_inverse
? brw_negate_cmod(scan_inst->conditional_mod)
: scan_inst->conditional_mod;
fs_inst *csel_inst = NULL;
if (inst->src[1].file != IMM) {
csel_inst = ibld.CSEL(inst->dst,
inst->src[0],
inst->src[1],
scan_inst->src[0],
cond);
} else if (cond == BRW_CONDITIONAL_NZ) {
/* Consider the sequence
*
* cmp.nz.f0 null<1>F g3<8,8,1>F 0F
* (+f0) sel g124<1>UD g2<8,8,1>UD 0x00000000UD
*
* The sel will pick the immediate value 0 if r0 is ±0.0.
* Therefore, this sequence is equivalent:
*
* cmp.nz.f0 null<1>F g3<8,8,1>F 0F
* (+f0) sel g124<1>F g2<8,8,1>F (abs)g3<8,8,1>F
*
* The abs is ensures that the result is 0UD when g3 is -0.0F.
* By normal cmp-sel merging, this is also equivalent:
*
* csel.nz g124<1>F g2<4,4,1>F (abs)g3<4,4,1>F g3<4,4,1>F
*/
csel_inst = ibld.CSEL(inst->dst,
inst->src[0],
scan_inst->src[0],
scan_inst->src[0],
cond);
csel_inst->src[1].abs = true;
}
if (csel_inst != NULL) {
progress = true;
csel_inst->saturate = inst->saturate;
inst->remove(block);
}
break;
}
}
}
return progress;
}
bool
fs_visitor::compute_to_mrf()
{
@ -7396,12 +7295,6 @@ fs_visitor::optimize()
OPT(compact_virtual_grfs);
} while (progress);
/* Do this after cmod propagation has had every possible opportunity to
* propagate results into SEL instructions.
*/
if (OPT(opt_peephole_csel))
OPT(dead_code_eliminate);
progress = false;
pass_num = 0;

View File

@ -190,7 +190,6 @@ public:
fs_reg result, fs_reg *op, unsigned fsign_src);
void emit_shader_float_controls_execution_mode();
bool opt_peephole_sel();
bool opt_peephole_csel();
bool opt_peephole_predicated_break();
bool opt_saturate_propagation();
bool opt_cmod_propagation();