aco: use v_fma_mix to combine mul/add/fma output conversions

fossil-db (Sienna Cichlid):
Totals from 42 (0.03% of 134913) affected shaders:
CodeSize: 596904 -> 596332 (-0.10%); split: -0.10%, +0.00%
Instrs: 110194 -> 109902 (-0.26%)
Latency: 1205239 -> 1204915 (-0.03%); split: -0.03%, +0.00%
InvThroughput: 189697 -> 189375 (-0.17%)
VClause: 1365 -> 1366 (+0.07%)
Copies: 5429 -> 5414 (-0.28%); split: -0.33%, +0.06%
Branches: 4034 -> 4026 (-0.20%)

fossil-db (Navi):
Totals from 42 (0.03% of 134913) affected shaders:
CodeSize: 596044 -> 595488 (-0.09%); split: -0.10%, +0.00%
Instrs: 110845 -> 110540 (-0.28%)
Latency: 1206131 -> 1205747 (-0.03%)
InvThroughput: 190178 -> 189809 (-0.19%)
VClause: 1372 -> 1370 (-0.15%); split: -0.29%, +0.15%
Copies: 5671 -> 5641 (-0.53%); split: -0.56%, +0.04%
Branches: 4033 -> 4025 (-0.20%)

fossil-db (Vega):
Totals from 42 (0.03% of 135048) affected shaders:
CodeSize: 605824 -> 605352 (-0.08%); split: -0.08%, +0.00%
Instrs: 115975 -> 115706 (-0.23%)
Latency: 1399845 -> 1398912 (-0.07%)
InvThroughput: 489901 -> 489442 (-0.09%)
VClause: 1314 -> 1311 (-0.23%); split: -0.38%, +0.15%
Copies: 9673 -> 9666 (-0.07%); split: -0.12%, +0.05%
Branches: 4025 -> 4024 (-0.02%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14769>
This commit is contained in:
Rhys Perry 2022-01-17 16:52:10 +00:00 committed by Marge Bot
parent 21304b772c
commit 1092f37805
1 changed files with 48 additions and 4 deletions

View File

@ -123,6 +123,7 @@ enum Label {
label_dpp16 = 1ull << 35,
label_dpp8 = 1ull << 36,
label_f2f32 = 1ull << 37,
label_f2f16 = 1ull << 38,
};
static constexpr uint64_t instr_usedef_labels =
@ -130,7 +131,7 @@ static constexpr uint64_t instr_usedef_labels =
label_uniform_bitwise | label_minmax | label_vopc | label_usedef | label_extract | label_dpp16 |
label_dpp8 | label_f2f32;
static constexpr uint64_t instr_mod_labels =
label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert;
label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert | label_f2f16;
static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels;
static constexpr uint64_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f |
@ -326,6 +327,14 @@ struct ssa_info {
bool is_clamp() { return label & label_clamp; }
void set_f2f16(Instruction* conv)
{
add_label(label_f2f16);
instr = conv;
}
bool is_f2f16() { return label & label_f2f16; }
void set_undefined() { add_label(label_undefined); }
bool is_undefined() { return label & label_undefined; }
@ -1889,6 +1898,11 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
break;
}
case aco_opcode::v_cvt_f16_f32: {
if (instr->operands[0].isTemp())
ctx.info[instr->operands[0].tempId()].set_f2f16(instr.get());
break;
}
case aco_opcode::v_cvt_f32_f16: {
if (instr->operands[0].isTemp())
ctx.info[instr->definitions[0].tempId()].set_f2f32(instr.get());
@ -3060,7 +3074,7 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_insert;
ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_insert | label_f2f16;
ctx.uses[def_info.instr->definitions[0].tempId()]--;
return true;
@ -3499,11 +3513,41 @@ to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
vop3p->clamp = instr->isVOP3() && instr->vop3().clamp;
instr = std::move(vop3p);
ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_mul;
ctx.info[instr->definitions[0].tempId()].label &= label_f2f16 | label_clamp | label_mul;
if (ctx.info[instr->definitions[0].tempId()].label & label_mul)
ctx.info[instr->definitions[0].tempId()].instr = instr.get();
}
bool
combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
if (!def_info.is_f2f16())
return false;
Instruction* conv = def_info.instr;
if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1)
return false;
if (!ctx.uses[conv->definitions[0].tempId()])
return false;
if (conv->usesModifiers())
return false;
if (!instr->isVOP3P())
to_mad_mix(ctx, instr);
instr->opcode = aco_opcode::v_fma_mixlo_f16;
instr->definitions[0].swapTemp(conv->definitions[0]);
if (conv->definitions[0].isPrecise())
instr->definitions[0].setPrecise(true);
ctx.info[instr->definitions[0].tempId()].label &= label_clamp;
ctx.uses[conv->definitions[0].tempId()]--;
return true;
}
void
combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
@ -3603,7 +3647,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (can_apply_sgprs(ctx, instr))
apply_sgprs(ctx, instr);
combine_mad_mix(ctx, instr);
while (apply_omod_clamp(ctx, instr))
while (apply_omod_clamp(ctx, instr) | combine_output_conversion(ctx, instr))
;
apply_insert(ctx, instr);
}