From f68797ead72c29678a54aae457dbd9e9b7946588 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 27 Apr 2021 12:11:37 +0100 Subject: [PATCH] aco: create v_mac_legacy_f32/v_fmac_legacy_f32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_ir.cpp | 1 + src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/aco_opcodes.py | 6 +++++- src/amd/compiler/aco_register_allocation.cpp | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 523b25e619e..1b624e4b930 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -148,6 +148,7 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info, if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO || program->family == CHIP_HAWAII) program->dev.has_fast_fma32 = true; + program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10; program->wgp_mode = wgp_mode; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 39b2e3aa70f..4a44448d014 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2047,6 +2047,7 @@ struct DeviceInfo { unsigned max_wave64_per_simd; unsigned simd_per_cu; bool has_fast_fma32 = false; + bool has_mac_legacy32 = false; bool xnack_enabled = false; bool sram_ecc_enabled = false; }; diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 16494a701c4..d95f37a21e7 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -674,7 +674,8 @@ VOP2 = { (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True), (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True), (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True), - (0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True), + (0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True), #GFX6,7,10 + ( -1, -1, -1, -1, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+ (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True), (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True), (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False), @@ -1686,6 +1687,9 @@ for ver in ['gfx9', 'gfx10']: # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3 if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']): continue + # v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3 + if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']): + continue print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver)) sys.exit(1) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 088afaa307c..ab10b2f3bcc 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2383,6 +2383,13 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) op = instr->operands[2]; break; + case aco_opcode::v_mad_legacy_f32: + case aco_opcode::v_fma_legacy_f32: + if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32) + continue; + op = instr->operands[2]; + break; + default: continue; } @@ -2577,6 +2584,8 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra instr->opcode == aco_opcode::v_mad_legacy_f16 || (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) || (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) || + (instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) || + (instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) || (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) && instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() && instr->operands[2].getTemp().type() == RegType::vgpr && @@ -2608,6 +2617,12 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break; case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break; case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break; + case aco_opcode::v_mad_legacy_f32: + instr->opcode = aco_opcode::v_mac_legacy_f32; + break; + case aco_opcode::v_fma_legacy_f32: + instr->opcode = aco_opcode::v_fmac_legacy_f32; + break; default: break; } } @@ -2617,6 +2632,7 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || + instr->opcode == aco_opcode::v_fmac_legacy_f32 || instr->opcode == aco_opcode::v_pk_fmac_f16 || instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64 ||