ac: use fma on gfx10
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
This commit is contained in:
parent
d979e5bfab
commit
d64593e3c4
|
@ -2665,6 +2665,13 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
||||||
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
||||||
LLVMValueRef s1, LLVMValueRef s2)
|
LLVMValueRef s1, LLVMValueRef s2)
|
||||||
{
|
{
|
||||||
|
/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
|
||||||
|
if (ctx->chip_class >= GFX10) {
|
||||||
|
return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32,
|
||||||
|
(LLVMValueRef []) {s0, s1, s2}, 3,
|
||||||
|
AC_FUNC_ATTR_READNONE);
|
||||||
|
}
|
||||||
|
|
||||||
return LLVMBuildFAdd(ctx->builder,
|
return LLVMBuildFAdd(ctx->builder,
|
||||||
LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
|
LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -811,7 +811,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case nir_op_ffma:
|
case nir_op_ffma:
|
||||||
result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
|
/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
|
||||||
|
result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
|
||||||
ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
|
ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
|
||||||
break;
|
break;
|
||||||
case nir_op_ldexp:
|
case nir_op_ldexp:
|
||||||
|
|
Loading…
Reference in New Issue