aco: use a single instruction for uadd32_sat() on GFX8

fossil-db (GFX8):
Totals from 8 (0.01% of 147787) affected shaders:
SGPRs: 352 -> 368 (+4.55%)
CodeSize: 49576 -> 48788 (-1.59%)
Instrs: 9487 -> 9318 (-1.78%)
Latency: 49935 -> 49607 (-0.66%)
InvThroughput: 138493 -> 137443 (-0.76%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9598>
This commit is contained in:
Rhys Perry 2021-03-15 13:35:54 +00:00
parent 3decb52c82
commit 5bc100eb2d
1 changed files with 10 additions and 5 deletions

View File

@ -1230,13 +1230,18 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
Temp uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
{
if (bld.program->chip_class >= GFX9) {
Builder::Result add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1);
add.instr->vop3().clamp = 1;
} else {
if (bld.program->chip_class < GFX8) {
Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true);
bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand((uint32_t) -1), add.def(1).getTemp());
return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand((uint32_t) -1), add.def(1).getTemp());
}
Builder::Result add(NULL);
if (bld.program->chip_class >= GFX9) {
add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1);
} else {
add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.hint_vcc(bld.def(bld.lm)), src0, src1);
}
add.instr->vop3().clamp = 1;
return dst.getTemp();
}