v3d: Enable the late algebraic optimizations to get real subs.

This worked better than my original v3d-local pass for just subs, and is a
huge win over not producing subs.

total instructions in shared programs: 6408469 -> 6167932 (-3.75%)
total threads in shared programs: 153784 -> 154104 (0.21%)
total uniforms in shared programs: 2157078 -> 1905823 (-11.65%)
total max-temps in shared programs: 904546 -> 895796 (-0.97%)
total spills in shared programs: 4959 -> 4993 (0.69%)
total fills in shared programs: 6558 -> 6670 (1.71%)
total sfu-stalls in shared programs: 25845 -> 25175 (-2.59%)
total inst-and-stalls in shared programs: 6434314 -> 6193107 (-3.75%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
Eric Anholt 2019-09-25 11:56:06 -07:00 committed by Daniel Schürmann
parent 1d29895e5b
commit ca1aa5d225
1 changed files with 16 additions and 0 deletions

View File

@ -939,6 +939,22 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
NIR_PASS_V(c->s, nir_lower_idiv);
v3d_optimize_nir(c->s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may
* produce fnegs, and if so then we need to keep running to squash
* fneg(fneg(a)).
*/
bool more_late_algebraic = true;
while (more_late_algebraic) {
more_late_algebraic = false;
NIR_PASS(more_late_algebraic, c->s, nir_opt_algebraic_late);
NIR_PASS_V(c->s, nir_opt_constant_folding);
NIR_PASS_V(c->s, nir_copy_prop);
NIR_PASS_V(c->s, nir_opt_dce);
NIR_PASS_V(c->s, nir_opt_cse);
}
NIR_PASS_V(c->s, nir_lower_bool_to_int32);
NIR_PASS_V(c->s, nir_convert_from_ssa, true);