aco: disallow various v_add_u32 opts if modifiers are used

Check for clamp, SDWA or DPP. The optimization isn't possible with SDWA
and DPP, so it would have been skipped anyway. Doing any of these with a
clamp modifier present would be incorrect.

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
This commit is contained in:
Rhys Perry 2020-10-07 11:45:30 +01:00 committed by Marge Bot
parent 91ffeed88a
commit 966732e8ca
2 changed files with 29 additions and 1 deletions

View File

@ -2907,7 +2907,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
} else if (instr->opcode == aco_opcode::v_add_u32) {
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
else if (combine_add_bcnt(ctx, instr)) ;
else if (ctx.program->chip_class >= GFX9) {
else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;

View File

@ -365,3 +365,31 @@ BEGIN_TEST(optimize.const_comparison_ordering)
finish_opt_test();
END_TEST
BEGIN_TEST(optimize.add3)
//>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
if (!setup_cs("v1 v1 v1", GFX9))
return;
//! v1: %res0 = v_add3_u32 %a, %b, %c
//! p_unit_test 0, %res0
Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp1 = v_add_u32 %b, %c clamp
//! v1: %res1 = v_add_u32 %a, %tmp1
//! p_unit_test 1, %res1
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp2 = v_add_u32 %b, %c
//! v1: %res2 = v_add_u32 %a, %tmp2 clamp
//! p_unit_test 2, %res2
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
writeout(2, tmp);
finish_opt_test();
END_TEST