aco: use a single instruction for uadd32_sat() on GFX8
fossil-db (GFX8): Totals from 8 (0.01% of 147787) affected shaders: SGPRs: 352 -> 368 (+4.55%) CodeSize: 49576 -> 48788 (-1.59%) Instrs: 9487 -> 9318 (-1.78%) Latency: 49935 -> 49607 (-0.66%) InvThroughput: 138493 -> 137443 (-0.76%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9598>
This commit is contained in:
parent
3decb52c82
commit
5bc100eb2d
|
@ -1230,13 +1230,18 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
|
|||
|
||||
Temp uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
|
||||
{
|
||||
if (bld.program->chip_class >= GFX9) {
|
||||
Builder::Result add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1);
|
||||
add.instr->vop3().clamp = 1;
|
||||
} else {
|
||||
if (bld.program->chip_class < GFX8) {
|
||||
Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true);
|
||||
bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand((uint32_t) -1), add.def(1).getTemp());
|
||||
return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand((uint32_t) -1), add.def(1).getTemp());
|
||||
}
|
||||
|
||||
Builder::Result add(NULL);
|
||||
if (bld.program->chip_class >= GFX9) {
|
||||
add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1);
|
||||
} else {
|
||||
add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.hint_vcc(bld.def(bld.lm)), src0, src1);
|
||||
}
|
||||
add.instr->vop3().clamp = 1;
|
||||
return dst.getTemp();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue