From be16ebc5cafb2864228bf4ad29935cf0187e7f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 30 Jun 2021 19:20:49 +0200 Subject: [PATCH] aco/optimizer: fuse v_mul_f64 + v_add_f64 -> v_fma_f64 No fossil-db changes. Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index f739257afaf..8874673f823 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1422,6 +1422,7 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) if (!ctx.program->needs_wqm) ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u); break; + case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break; case aco_opcode::v_mul_f16: case aco_opcode::v_mul_f32: { /* omod */ ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); @@ -3238,9 +3239,11 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) instr->opcode == aco_opcode::v_subrev_f32; bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 || instr->opcode == aco_opcode::v_subrev_f16; - if (mad16 || mad32) { - bool need_fma = mad32 ? (ctx.fp_mode.denorm32 != 0 || ctx.program->chip_class >= GFX10_3) - : (ctx.fp_mode.denorm16_64 != 0 || ctx.program->chip_class >= GFX10); + bool mad64 = instr->opcode == aco_opcode::v_add_f64; + if (mad16 || mad32 || mad64) { + bool need_fma = + mad32 ? (ctx.fp_mode.denorm32 != 0 || ctx.program->chip_class >= GFX10_3) + : (ctx.fp_mode.denorm16_64 != 0 || ctx.program->chip_class >= GFX10 || mad64); if (need_fma && instr->definitions[0].isPrecise()) return; if (need_fma && mad32 && !ctx.program->dev.has_fast_fma32) @@ -3325,6 +3328,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) : aco_opcode::v_fma_f16) : (ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16 : aco_opcode::v_mad_f16); + if (mad64) + mad_op = aco_opcode::v_fma_f64; aco_ptr mad{ create_instruction(mad_op, Format::VOP3, 3, 1)}; @@ -3591,7 +3596,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) mad_info = NULL; } /* check literals */ - else if (!instr->usesModifiers()) { + else if (!instr->usesModifiers() && instr->opcode != aco_opcode::v_fma_f64) { /* FMA can only take literals on GFX10+ */ if ((instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) && ctx.program->chip_class < GFX10)