ac: generate FMA for inexact instructions for radeonsi
NIR mostly does this already. Totals: SGPRS: 2588520 -> 2591784 (0.13 %) VGPRS: 1666984 -> 1666888 (-0.01 %) Spilled SGPRs: 4074 -> 4131 (1.40 %) Spilled VGPRs: 38 -> 38 (0.00 %) Private memory VGPRs: 2176 -> 2176 (0.00 %) Scratch size: 2228 -> 2228 (0.00 %) dwords per thread Code Size: 52726872 -> 52715468 (-0.02 %) bytes LDS: 92 -> 92 (0.00 %) blocks Max Waves: 479872 -> 479897 (0.01 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4696>
This commit is contained in:
parent
f2c2a28073
commit
4b9370cb0f
|
@ -96,6 +96,11 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
|||
*/
|
||||
flags.setAllowReciprocal(); /* arcp */
|
||||
|
||||
/* Allow floating-point contraction (e.g. fusing a multiply
|
||||
* followed by an addition into a fused multiply-and-add).
|
||||
*/
|
||||
flags.setAllowContract(); /* contract */
|
||||
|
||||
llvm::unwrap(builder)->setFastMathFlags(flags);
|
||||
break;
|
||||
}
|
||||
|
@ -103,6 +108,32 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
|||
return builder;
|
||||
}
|
||||
|
||||
/* Return the original state of inexact math. */
|
||||
bool ac_disable_inexact_math(LLVMBuilderRef builder)
|
||||
{
|
||||
auto *b = llvm::unwrap(builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
|
||||
if (!flags.allowContract())
|
||||
return false;
|
||||
|
||||
flags.setAllowContract(false);
|
||||
b->setFastMathFlags(flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ac_restore_inexact_math(LLVMBuilderRef builder, bool value)
|
||||
{
|
||||
auto *b = llvm::unwrap(builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
|
||||
if (flags.allowContract() == value)
|
||||
return;
|
||||
|
||||
flags.setAllowContract(value);
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
|
||||
LLVMTargetLibraryInfoRef
|
||||
ac_create_target_library_info(const char *triple)
|
||||
{
|
||||
|
|
|
@ -109,6 +109,8 @@ LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
|
|||
|
||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
||||
enum ac_float_mode float_mode);
|
||||
bool ac_disable_inexact_math(LLVMBuilderRef builder);
|
||||
void ac_restore_inexact_math(LLVMBuilderRef builder, bool value);
|
||||
|
||||
void
|
||||
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
|
||||
|
|
|
@ -589,6 +589,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
unsigned num_components = instr->dest.dest.ssa.num_components;
|
||||
unsigned src_components;
|
||||
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
|
||||
bool saved_inexact = false;
|
||||
|
||||
if (instr->exact)
|
||||
saved_inexact = ac_disable_inexact_math(ctx->ac.builder);
|
||||
|
||||
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
|
||||
switch (instr->op) {
|
||||
|
@ -1182,6 +1186,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
result = ac_to_integer_or_pointer(&ctx->ac, result);
|
||||
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
|
||||
}
|
||||
|
||||
if (instr->exact)
|
||||
ac_restore_inexact_math(ctx->ac.builder, saved_inexact);
|
||||
}
|
||||
|
||||
static void visit_load_const(struct ac_nir_context *ctx,
|
||||
|
|
Loading…
Reference in New Issue