pan/bi: Fuse result types

In NIR, comparison instructions always produce 0/~0 results. For other result
types, a separate b2f32 or b2i32 instruction is used to transform the result.
However, Mali's comparison instructions have modifiers for these alternate
result types, so we can implement expressions like int(a < b) and float(a ==
b) in single instruction. Add a peephole optimization to fuse comparisons
with result type transformations.

Results on Mali-G52:

total instructions in shared programs: 2439696 -> 2434339 (-0.22%)
instructions in affected programs: 418703 -> 413346 (-1.28%)
helped: 1630
HURT: 0
helped stats (abs) min: 1.0 max: 28.0 x̄: 3.29 x̃: 2
helped stats (rel) min: 0.11% max: 19.35% x̄: 1.64% x̃: 1.39%
95% mean confidence interval for instructions value: -3.44 -3.13
95% mean confidence interval for instructions %-change: -1.72% -1.56%
Instructions are helped.

total tuples in shared programs: 1946581 -> 1943005 (-0.18%)
tuples in affected programs: 251742 -> 248166 (-1.42%)
helped: 1113
HURT: 11
helped stats (abs) min: 1.0 max: 32.0 x̄: 3.23 x̃: 2
helped stats (rel) min: 0.17% max: 15.38% x̄: 1.80% x̃: 1.38%
HURT stats (abs)   min: 1.0 max: 2.0 x̄: 1.45 x̃: 1
HURT stats (rel)   min: 0.21% max: 3.12% x̄: 1.23% x̃: 0.89%
95% mean confidence interval for tuples value: -3.35 -3.01
95% mean confidence interval for tuples %-change: -1.88% -1.66%
Tuples are helped.

total clauses in shared programs: 357791 -> 357349 (-0.12%)
clauses in affected programs: 15879 -> 15437 (-2.78%)
helped: 371
HURT: 3
helped stats (abs) min: 1.0 max: 8.0 x̄: 1.20 x̃: 1
helped stats (rel) min: 0.80% max: 33.33% x̄: 3.85% x̃: 2.17%
HURT stats (abs)   min: 1.0 max: 1.0 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 2.94% max: 5.26% x̄: 4.49% x̃: 5.26%
95% mean confidence interval for clauses value: -1.27 -1.09
95% mean confidence interval for clauses %-change: -4.21% -3.36%
Clauses are helped.

total cycles in shared programs: 167922.04 -> 167810.71 (-0.07%)
cycles in affected programs: 6772.08 -> 6660.75 (-1.64%)
helped: 655
HURT: 12
helped stats (abs) min: 0.041665999999999315 max: 1.3333319999999986 x̄: 0.17 x̃: 0
helped stats (rel) min: 0.18% max: 20.00% x̄: 2.02% x̃: 1.60%
HURT stats (abs)   min: 0.041665999999999315 max: 0.125 x̄: 0.05 x̃: 0
HURT stats (rel)   min: 0.21% max: 3.80% x̄: 1.23% x̃: 0.88%
95% mean confidence interval for cycles value: -0.18 -0.16
95% mean confidence interval for cycles %-change: -2.10% -1.81%
Cycles are helped.

total arith in shared programs: 74393.17 -> 74243.08 (-0.20%)
arith in affected programs: 10157.50 -> 10007.42 (-1.48%)
helped: 1129
HURT: 12
helped stats (abs) min: 0.041665999999999315 max: 1.3333319999999986 x̄: 0.13 x̃: 0
helped stats (rel) min: 0.18% max: 50.00% x̄: 1.94% x̃: 1.40%
HURT stats (abs)   min: 0.041665999999999315 max: 0.125 x̄: 0.05 x̃: 0
HURT stats (rel)   min: 0.21% max: 3.80% x̄: 1.23% x̃: 0.88%
95% mean confidence interval for arith value: -0.14 -0.12
95% mean confidence interval for arith %-change: -2.06% -1.76%
Arith are helped.

total quadwords in shared programs: 1692019 -> 1688164 (-0.23%)
quadwords in affected programs: 216669 -> 212814 (-1.78%)
helped: 1148
HURT: 11
helped stats (abs) min: 1.0 max: 41.0 x̄: 3.37 x̃: 2
helped stats (rel) min: 0.17% max: 17.24% x̄: 2.25% x̃: 1.73%
HURT stats (abs)   min: 1.0 max: 2.0 x̄: 1.09 x̃: 1
HURT stats (rel)   min: 0.60% max: 1.32% x̄: 0.85% x̃: 0.83%
95% mean confidence interval for quadwords value: -3.49 -3.16
95% mean confidence interval for quadwords %-change: -2.33% -2.10%
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16725>
This commit is contained in:
Alyssa Rosenzweig 2022-01-06 17:12:56 -05:00 committed by Marge Bot
parent 112a856813
commit 501a66cb5c
1 changed files with 79 additions and 1 deletions

View File

@ -228,6 +228,83 @@ bi_optimizer_clamp(bi_instr *I, bi_instr *use)
return true;
}
static enum bi_opcode
bi_sized_mux_op(unsigned size)
{
switch (size) {
case 8: return BI_OPCODE_MUX_V4I8;
case 16: return BI_OPCODE_MUX_V2I16;
case 32: return BI_OPCODE_MUX_I32;
default: unreachable("invalid size");
}
}
static bool
bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
{
return I->op == bi_sized_mux_op(size) &&
bi_is_value_equiv(I->src[0], bi_zero()) &&
bi_is_value_equiv(I->src[1], v1);
}
static bool
bi_takes_int_result_type(enum bi_opcode op)
{
switch (op) {
case BI_OPCODE_ICMP_I32:
case BI_OPCODE_ICMP_S32:
case BI_OPCODE_ICMP_U32:
case BI_OPCODE_ICMP_V2I16:
case BI_OPCODE_ICMP_V2S16:
case BI_OPCODE_ICMP_V2U16:
case BI_OPCODE_ICMP_V4I8:
case BI_OPCODE_ICMP_V4S8:
case BI_OPCODE_ICMP_V4U8:
case BI_OPCODE_FCMP_F32:
case BI_OPCODE_FCMP_V2F16:
return true;
default:
return false;
}
}
static bool
bi_takes_float_result_type(enum bi_opcode op)
{
return (op == BI_OPCODE_FCMP_F32) ||
(op == BI_OPCODE_FCMP_V2F16);
}
/* CMP+MUX -> CMP with result type */
static bool
bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
{
if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
return false;
if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
if (!bi_takes_float_result_type(I->op))
return false;
I->result_type = BI_RESULT_TYPE_F1;
} else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
if (!bi_takes_int_result_type(I->op))
return false;
I->result_type = BI_RESULT_TYPE_I1;
} else {
return false;
}
I->dest[0] = mux->dest[0];
return true;
}
static bool
bi_is_var_tex(bi_instr *var, bi_instr *tex)
{
@ -289,7 +366,8 @@ bi_opt_mod_prop_backward(bi_context *ctx)
/* Destination has a single use, try to propagate */
bool propagated =
bi_optimizer_clamp(I, use);
bi_optimizer_clamp(I, use) ||
bi_optimizer_result_type(I, use);
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
/* Need to see through the split in a