aco: create v_mac_legacy_f32/v_fmac_legacy_f32

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
This commit is contained in:
Rhys Perry 2021-04-27 12:11:37 +01:00 committed by Marge Bot
parent 43e32ad074
commit f68797ead7
4 changed files with 23 additions and 1 deletions

View File

@ -148,6 +148,7 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info,
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
program->family == CHIP_HAWAII)
program->dev.has_fast_fma32 = true;
program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
program->wgp_mode = wgp_mode;

View File

@ -2047,6 +2047,7 @@ struct DeviceInfo {
unsigned max_wave64_per_simd;
unsigned simd_per_cu;
bool has_fast_fma32 = false;
bool has_mac_legacy32 = false;
bool xnack_enabled = false;
bool sram_ecc_enabled = false;
};

View File

@ -674,7 +674,8 @@ VOP2 = {
(0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
(0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
(0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
(0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True),
(0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True), #GFX6,7,10
( -1, -1, -1, -1, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+
(0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
(0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
(0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
@ -1686,6 +1687,9 @@ for ver in ['gfx9', 'gfx10']:
# v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
continue
# v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3
if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']):
continue
print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
sys.exit(1)

View File

@ -2383,6 +2383,13 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
op = instr->operands[2];
break;
case aco_opcode::v_mad_legacy_f32:
case aco_opcode::v_fma_legacy_f32:
if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32)
continue;
op = instr->operands[2];
break;
default: continue;
}
@ -2577,6 +2584,8 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
(instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) ||
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) ||
(instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
(instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
(instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
instr->operands[2].getTemp().type() == RegType::vgpr &&
@ -2608,6 +2617,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
case aco_opcode::v_mad_legacy_f32:
instr->opcode = aco_opcode::v_mac_legacy_f32;
break;
case aco_opcode::v_fma_legacy_f32:
instr->opcode = aco_opcode::v_fmac_legacy_f32;
break;
default: break;
}
}
@ -2617,6 +2632,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 ||
instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
instr->opcode == aco_opcode::v_fmac_legacy_f32 ||
instr->opcode == aco_opcode::v_pk_fmac_f16 ||
instr->opcode == aco_opcode::v_writelane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32_e64 ||