aco: create v_mac_legacy_f32/v_fmac_legacy_f32
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
This commit is contained in:
parent
43e32ad074
commit
f68797ead7
|
@ -148,6 +148,7 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info,
|
||||||
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
|
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
|
||||||
program->family == CHIP_HAWAII)
|
program->family == CHIP_HAWAII)
|
||||||
program->dev.has_fast_fma32 = true;
|
program->dev.has_fast_fma32 = true;
|
||||||
|
program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
|
||||||
|
|
||||||
program->wgp_mode = wgp_mode;
|
program->wgp_mode = wgp_mode;
|
||||||
|
|
||||||
|
|
|
@ -2047,6 +2047,7 @@ struct DeviceInfo {
|
||||||
unsigned max_wave64_per_simd;
|
unsigned max_wave64_per_simd;
|
||||||
unsigned simd_per_cu;
|
unsigned simd_per_cu;
|
||||||
bool has_fast_fma32 = false;
|
bool has_fast_fma32 = false;
|
||||||
|
bool has_mac_legacy32 = false;
|
||||||
bool xnack_enabled = false;
|
bool xnack_enabled = false;
|
||||||
bool sram_ecc_enabled = false;
|
bool sram_ecc_enabled = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -674,7 +674,8 @@ VOP2 = {
|
||||||
(0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
|
(0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
|
||||||
(0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
|
(0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
|
||||||
(0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
|
(0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
|
||||||
(0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True),
|
(0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True), #GFX6,7,10
|
||||||
|
( -1, -1, -1, -1, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+
|
||||||
(0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
|
(0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
|
||||||
(0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
|
(0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
|
||||||
(0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
|
(0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
|
||||||
|
@ -1686,6 +1687,9 @@ for ver in ['gfx9', 'gfx10']:
|
||||||
# v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
|
# v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
|
||||||
if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
|
if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
|
||||||
continue
|
continue
|
||||||
|
# v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3
|
||||||
|
if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']):
|
||||||
|
continue
|
||||||
|
|
||||||
print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
|
print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
|
@ -2383,6 +2383,13 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
|
||||||
op = instr->operands[2];
|
op = instr->operands[2];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case aco_opcode::v_mad_legacy_f32:
|
||||||
|
case aco_opcode::v_fma_legacy_f32:
|
||||||
|
if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32)
|
||||||
|
continue;
|
||||||
|
op = instr->operands[2];
|
||||||
|
break;
|
||||||
|
|
||||||
default: continue;
|
default: continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2577,6 +2584,8 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
||||||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
|
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
|
||||||
(instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) ||
|
(instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) ||
|
||||||
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) ||
|
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) ||
|
||||||
|
(instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
|
||||||
|
(instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
|
||||||
(instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
|
(instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
|
||||||
instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
|
instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
|
||||||
instr->operands[2].getTemp().type() == RegType::vgpr &&
|
instr->operands[2].getTemp().type() == RegType::vgpr &&
|
||||||
|
@ -2608,6 +2617,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
||||||
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
|
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
|
||||||
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
|
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
|
||||||
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
|
case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
|
||||||
|
case aco_opcode::v_mad_legacy_f32:
|
||||||
|
instr->opcode = aco_opcode::v_mac_legacy_f32;
|
||||||
|
break;
|
||||||
|
case aco_opcode::v_fma_legacy_f32:
|
||||||
|
instr->opcode = aco_opcode::v_fmac_legacy_f32;
|
||||||
|
break;
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2617,6 +2632,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
||||||
if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
|
if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
|
||||||
instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 ||
|
instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 ||
|
||||||
instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
|
instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
|
||||||
|
instr->opcode == aco_opcode::v_fmac_legacy_f32 ||
|
||||||
instr->opcode == aco_opcode::v_pk_fmac_f16 ||
|
instr->opcode == aco_opcode::v_pk_fmac_f16 ||
|
||||||
instr->opcode == aco_opcode::v_writelane_b32 ||
|
instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||||
instr->opcode == aco_opcode::v_writelane_b32_e64 ||
|
instr->opcode == aco_opcode::v_writelane_b32_e64 ||
|
||||||
|
|
Loading…
Reference in New Issue