pan/bi: Fuse result types
In NIR, comparison instructions always produce 0/~0 results. For other result types, a separate b2f32 or b2i32 instruction is used to transform the result. However, Mali's comparison instructions have modifiers for these alternate result types, so we can implement expressions like int(a < b) and float(a == b) in single instruction. Add a peephole optimization to fuse comparisons with result type transformations. Results on Mali-G52: total instructions in shared programs: 2439696 -> 2434339 (-0.22%) instructions in affected programs: 418703 -> 413346 (-1.28%) helped: 1630 HURT: 0 helped stats (abs) min: 1.0 max: 28.0 x̄: 3.29 x̃: 2 helped stats (rel) min: 0.11% max: 19.35% x̄: 1.64% x̃: 1.39% 95% mean confidence interval for instructions value: -3.44 -3.13 95% mean confidence interval for instructions %-change: -1.72% -1.56% Instructions are helped. total tuples in shared programs: 1946581 -> 1943005 (-0.18%) tuples in affected programs: 251742 -> 248166 (-1.42%) helped: 1113 HURT: 11 helped stats (abs) min: 1.0 max: 32.0 x̄: 3.23 x̃: 2 helped stats (rel) min: 0.17% max: 15.38% x̄: 1.80% x̃: 1.38% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.45 x̃: 1 HURT stats (rel) min: 0.21% max: 3.12% x̄: 1.23% x̃: 0.89% 95% mean confidence interval for tuples value: -3.35 -3.01 95% mean confidence interval for tuples %-change: -1.88% -1.66% Tuples are helped. total clauses in shared programs: 357791 -> 357349 (-0.12%) clauses in affected programs: 15879 -> 15437 (-2.78%) helped: 371 HURT: 3 helped stats (abs) min: 1.0 max: 8.0 x̄: 1.20 x̃: 1 helped stats (rel) min: 0.80% max: 33.33% x̄: 3.85% x̃: 2.17% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 2.94% max: 5.26% x̄: 4.49% x̃: 5.26% 95% mean confidence interval for clauses value: -1.27 -1.09 95% mean confidence interval for clauses %-change: -4.21% -3.36% Clauses are helped. total cycles in shared programs: 167922.04 -> 167810.71 (-0.07%) cycles in affected programs: 6772.08 -> 6660.75 (-1.64%) helped: 655 HURT: 12 helped stats (abs) min: 0.041665999999999315 max: 1.3333319999999986 x̄: 0.17 x̃: 0 helped stats (rel) min: 0.18% max: 20.00% x̄: 2.02% x̃: 1.60% HURT stats (abs) min: 0.041665999999999315 max: 0.125 x̄: 0.05 x̃: 0 HURT stats (rel) min: 0.21% max: 3.80% x̄: 1.23% x̃: 0.88% 95% mean confidence interval for cycles value: -0.18 -0.16 95% mean confidence interval for cycles %-change: -2.10% -1.81% Cycles are helped. total arith in shared programs: 74393.17 -> 74243.08 (-0.20%) arith in affected programs: 10157.50 -> 10007.42 (-1.48%) helped: 1129 HURT: 12 helped stats (abs) min: 0.041665999999999315 max: 1.3333319999999986 x̄: 0.13 x̃: 0 helped stats (rel) min: 0.18% max: 50.00% x̄: 1.94% x̃: 1.40% HURT stats (abs) min: 0.041665999999999315 max: 0.125 x̄: 0.05 x̃: 0 HURT stats (rel) min: 0.21% max: 3.80% x̄: 1.23% x̃: 0.88% 95% mean confidence interval for arith value: -0.14 -0.12 95% mean confidence interval for arith %-change: -2.06% -1.76% Arith are helped. total quadwords in shared programs: 1692019 -> 1688164 (-0.23%) quadwords in affected programs: 216669 -> 212814 (-1.78%) helped: 1148 HURT: 11 helped stats (abs) min: 1.0 max: 41.0 x̄: 3.37 x̃: 2 helped stats (rel) min: 0.17% max: 17.24% x̄: 2.25% x̃: 1.73% HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.09 x̃: 1 HURT stats (rel) min: 0.60% max: 1.32% x̄: 0.85% x̃: 0.83% 95% mean confidence interval for quadwords value: -3.49 -3.16 95% mean confidence interval for quadwords %-change: -2.33% -2.10% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16725>
This commit is contained in:
parent
112a856813
commit
501a66cb5c
|
@ -228,6 +228,83 @@ bi_optimizer_clamp(bi_instr *I, bi_instr *use)
|
|||
return true;
|
||||
}
|
||||
|
||||
static enum bi_opcode
|
||||
bi_sized_mux_op(unsigned size)
|
||||
{
|
||||
switch (size) {
|
||||
case 8: return BI_OPCODE_MUX_V4I8;
|
||||
case 16: return BI_OPCODE_MUX_V2I16;
|
||||
case 32: return BI_OPCODE_MUX_I32;
|
||||
default: unreachable("invalid size");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
|
||||
{
|
||||
return I->op == bi_sized_mux_op(size) &&
|
||||
bi_is_value_equiv(I->src[0], bi_zero()) &&
|
||||
bi_is_value_equiv(I->src[1], v1);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_int_result_type(enum bi_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_OPCODE_ICMP_I32:
|
||||
case BI_OPCODE_ICMP_S32:
|
||||
case BI_OPCODE_ICMP_U32:
|
||||
case BI_OPCODE_ICMP_V2I16:
|
||||
case BI_OPCODE_ICMP_V2S16:
|
||||
case BI_OPCODE_ICMP_V2U16:
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
case BI_OPCODE_ICMP_V4S8:
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
case BI_OPCODE_FCMP_F32:
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_float_result_type(enum bi_opcode op)
|
||||
{
|
||||
return (op == BI_OPCODE_FCMP_F32) ||
|
||||
(op == BI_OPCODE_FCMP_V2F16);
|
||||
}
|
||||
|
||||
/* CMP+MUX -> CMP with result type */
|
||||
static bool
|
||||
bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
|
||||
{
|
||||
if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
|
||||
return false;
|
||||
|
||||
if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
|
||||
bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
|
||||
|
||||
if (!bi_takes_float_result_type(I->op))
|
||||
return false;
|
||||
|
||||
I->result_type = BI_RESULT_TYPE_F1;
|
||||
} else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
|
||||
bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
|
||||
bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
|
||||
|
||||
if (!bi_takes_int_result_type(I->op))
|
||||
return false;
|
||||
|
||||
I->result_type = BI_RESULT_TYPE_I1;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
I->dest[0] = mux->dest[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_var_tex(bi_instr *var, bi_instr *tex)
|
||||
{
|
||||
|
@ -289,7 +366,8 @@ bi_opt_mod_prop_backward(bi_context *ctx)
|
|||
|
||||
/* Destination has a single use, try to propagate */
|
||||
bool propagated =
|
||||
bi_optimizer_clamp(I, use);
|
||||
bi_optimizer_clamp(I, use) ||
|
||||
bi_optimizer_result_type(I, use);
|
||||
|
||||
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Need to see through the split in a
|
||||
|
|
Loading…
Reference in New Issue