aco: disallow various v_add_u32 opts if modifiers are used
Check for clamp, SDWA or DPP. The optimization isn't possible with SDWA and DPP, so it would have been skipped anyway. Doing any of these with a clamp modifier present would be incorrect. No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
This commit is contained in:
parent
91ffeed88a
commit
966732e8ca
|
@ -2907,7 +2907,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
} else if (instr->opcode == aco_opcode::v_add_u32) {
|
||||
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
|
||||
else if (combine_add_bcnt(ctx, instr)) ;
|
||||
else if (ctx.program->chip_class >= GFX9) {
|
||||
else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
|
||||
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
|
||||
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;
|
||||
|
|
|
@ -365,3 +365,31 @@ BEGIN_TEST(optimize.const_comparison_ordering)
|
|||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.add3)
|
||||
//>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1", GFX9))
|
||||
return;
|
||||
|
||||
//! v1: %res0 = v_add3_u32 %a, %b, %c
|
||||
//! p_unit_test 0, %res0
|
||||
Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
|
||||
|
||||
//! v1: %tmp1 = v_add_u32 %b, %c clamp
|
||||
//! v1: %res1 = v_add_u32 %a, %tmp1
|
||||
//! p_unit_test 1, %res1
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
|
||||
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
|
||||
|
||||
//! v1: %tmp2 = v_add_u32 %b, %c
|
||||
//! v1: %res2 = v_add_u32 %a, %tmp2 clamp
|
||||
//! p_unit_test 2, %res2
|
||||
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
|
||||
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
|
||||
writeout(2, tmp);
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
Loading…
Reference in New Issue