aco: consider legacy multiplications in optimizer
Optimize omod, -(a*b), b2f(a)*b, a*1, a*0 and create MAD/FMA. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
This commit is contained in:
parent
e7f91b194a
commit
43e32ad074
|
@ -1603,7 +1603,8 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
break;
|
||||
case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break;
|
||||
case aco_opcode::v_mul_f16:
|
||||
case aco_opcode::v_mul_f32: { /* omod */
|
||||
case aco_opcode::v_mul_f32:
|
||||
case aco_opcode::v_mul_legacy_f32: { /* omod */
|
||||
ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
|
||||
|
||||
/* TODO: try to move the negate/abs modifier to the consumer instead */
|
||||
|
@ -1645,8 +1646,9 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
(fp16 ? 0x3800 : 0x3f000000)) { /* 0.5 */
|
||||
ctx.info[instr->operands[i].tempId()].set_omod5(instr.get());
|
||||
} else if (instr->operands[!i].constantValue() == 0u &&
|
||||
!(fp16 ? ctx.fp_mode.preserve_signed_zero_inf_nan16_64
|
||||
: ctx.fp_mode.preserve_signed_zero_inf_nan32)) { /* 0.0 */
|
||||
(!(fp16 ? ctx.fp_mode.preserve_signed_zero_inf_nan16_64
|
||||
: ctx.fp_mode.preserve_signed_zero_inf_nan32) ||
|
||||
instr->opcode == aco_opcode::v_mul_legacy_f32)) { /* 0.0 */
|
||||
ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u);
|
||||
} else {
|
||||
continue;
|
||||
|
@ -3496,6 +3498,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
return;
|
||||
if (mul_instr->isSDWA() || mul_instr->isDPP())
|
||||
return;
|
||||
if (mul_instr->opcode == aco_opcode::v_mul_legacy_f32 &&
|
||||
ctx.fp_mode.preserve_signed_zero_inf_nan32)
|
||||
return;
|
||||
|
||||
/* convert to mul(neg(a), b) */
|
||||
ctx.uses[mul_instr->definitions[0].tempId()]--;
|
||||
|
@ -3554,6 +3559,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
|
||||
continue;
|
||||
|
||||
bool legacy = info.instr->opcode == aco_opcode::v_mul_legacy_f32;
|
||||
if (legacy && need_fma && ctx.program->chip_class < GFX10_3)
|
||||
continue;
|
||||
|
||||
Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
|
||||
if (info.instr->isSDWA() || info.instr->isDPP() || !check_vop3_operands(ctx, 3, op) ||
|
||||
ctx.uses[instr->operands[i].tempId()] > uses)
|
||||
|
@ -3619,13 +3628,17 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
neg[2 - add_op_idx] = neg[2 - add_op_idx] ^ true;
|
||||
|
||||
aco_opcode mad_op = need_fma ? aco_opcode::v_fma_f32 : aco_opcode::v_mad_f32;
|
||||
if (mad16)
|
||||
if (mul_instr->opcode == aco_opcode::v_mul_legacy_f32) {
|
||||
assert(need_fma == (ctx.program->chip_class >= GFX10_3));
|
||||
mad_op = need_fma ? aco_opcode::v_fma_legacy_f32 : aco_opcode::v_mad_legacy_f32;
|
||||
} else if (mad16) {
|
||||
mad_op = need_fma ? (ctx.program->chip_class == GFX8 ? aco_opcode::v_fma_legacy_f16
|
||||
: aco_opcode::v_fma_f16)
|
||||
: (ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16
|
||||
: aco_opcode::v_mad_f16);
|
||||
if (mad64)
|
||||
} else if (mad64) {
|
||||
mad_op = aco_opcode::v_fma_f64;
|
||||
}
|
||||
|
||||
aco_ptr<VOP3_instruction> mad{
|
||||
create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)};
|
||||
|
@ -3646,7 +3659,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
}
|
||||
/* v_mul_f32(v_cndmask_b32(0, 1.0, cond), a) -> v_cndmask_b32(0, a, cond) */
|
||||
else if (instr->opcode == aco_opcode::v_mul_f32 && !ctx.fp_mode.preserve_signed_zero_inf_nan32 &&
|
||||
else if (((instr->opcode == aco_opcode::v_mul_f32 &&
|
||||
!ctx.fp_mode.preserve_signed_zero_inf_nan32) ||
|
||||
instr->opcode == aco_opcode::v_mul_legacy_f32) &&
|
||||
!instr->usesModifiers() && !ctx.fp_mode.must_flush_denorms32) {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2f() &&
|
||||
|
@ -3904,7 +3919,9 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
mad_info = NULL;
|
||||
}
|
||||
/* check literals */
|
||||
else if (!instr->usesModifiers() && instr->opcode != aco_opcode::v_fma_f64) {
|
||||
else if (!instr->usesModifiers() && instr->opcode != aco_opcode::v_fma_f64 &&
|
||||
instr->opcode != aco_opcode::v_mad_legacy_f32 &&
|
||||
instr->opcode != aco_opcode::v_fma_legacy_f32) {
|
||||
/* FMA can only take literals on GFX10+ */
|
||||
if ((instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) &&
|
||||
ctx.program->chip_class < GFX10)
|
||||
|
|
Loading…
Reference in New Issue