nir/lower_locals_to_regs: Do an ad-hoc copy propagate on our generated MOV.
I noticed the inefficiency in NIR-to-TGSI output while trying to debug a failure handling some arrays in r600. While this makes reading CTS shaders easier, the effect in the real world is pretty limited. From softpipe shader-db: total instructions in shared programs: 2929840 -> 2929836 (<.01%) instructions in affected programs: 118 -> 114 (-3.39%) Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14321>
This commit is contained in:
parent
ef325d4650
commit
61400f8a2d
|
@ -227,7 +227,27 @@ lower_locals_to_regs_block(nir_block *block,
|
|||
nir_src reg_src = get_deref_reg_src(deref, state);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
|
||||
|
||||
nir_src_copy(&mov->src[0].src, &intrin->src[1]);
|
||||
|
||||
/* The normal NIR SSA copy propagate pass can't happen after this pass,
|
||||
* so do an ad-hoc copy propagate since this ALU op can do swizzles
|
||||
* while the deref couldn't.
|
||||
*/
|
||||
if (mov->src[0].src.is_ssa) {
|
||||
nir_instr *parent = mov->src[0].src.ssa->parent_instr;
|
||||
if (parent->type == nir_instr_type_alu) {
|
||||
nir_alu_instr *parent_alu = nir_instr_as_alu(parent);
|
||||
if (parent_alu->op == nir_op_mov && parent_alu->src[0].src.is_ssa) {
|
||||
for (unsigned i = 0; i < intrin->num_components; i++)
|
||||
mov->src[0].swizzle[i] = parent_alu->src[0].swizzle[mov->src[0].swizzle[i]];
|
||||
mov->src[0].abs = parent_alu->src[0].abs;
|
||||
mov->src[0].negate = parent_alu->src[0].negate;
|
||||
mov->src[0].src = parent_alu->src[0].src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mov->dest.write_mask = nir_intrinsic_write_mask(intrin);
|
||||
mov->dest.dest.is_ssa = false;
|
||||
mov->dest.dest.reg.reg = reg_src.reg.reg;
|
||||
|
|
Loading…
Reference in New Issue