From c1f750eed9eecf867a2f592cef76106531021a2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Apr 2024 21:51:04 -0400 Subject: [PATCH] nir: add nir_intrinsic_optimization_barrier_sgpr_amd for radeonsi Reviewed-by: Georg Lehmann Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 4 ++++ src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_intrinsics.py | 5 ++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 1a80b2b38d821..df477c3ad1e91 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3221,6 +3221,10 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins result = get_src(ctx, instr->src[0]); ac_build_optimization_barrier(&ctx->ac, &result, false); break; + case nir_intrinsic_optimization_barrier_sgpr_amd: + result = get_src(ctx, instr->src[0]); + ac_build_optimization_barrier(&ctx->ac, &result, true); + break; case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: { LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], 0); diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 7ed1da6d29651..f1afaf8bdfcc1 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -263,6 +263,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_tess_param_base_ir3: case nir_intrinsic_load_primitive_location_ir3: case nir_intrinsic_preamble_start_ir3: + case nir_intrinsic_optimization_barrier_sgpr_amd: is_divergent = false; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index fd8804b29cf9c..2ea2e740a2fbb 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1453,9 +1453,12 @@ store("tf_r600", []) # This barrier is a hint that prevents moving the instruction that computes # src after this barrier. It's a constraint for the instruction scheduler. # Otherwise it's identical to a move instruction. -# On AMD, it also forces the src value to be stored in a VGPR. +# The VGPR version forces the src value to be stored in a VGPR, while the SGPR +# version enforces an SGPR. intrinsic("optimization_barrier_vgpr_amd", dest_comp=0, src_comp=[0], flags=[CAN_ELIMINATE]) +intrinsic("optimization_barrier_sgpr_amd", dest_comp=0, src_comp=[0], + flags=[CAN_ELIMINATE]) # These are no-op intrinsics used as a simple source and user of SSA defs for testing. intrinsic("unit_test_amd", src_comp=[0], indices=[BASE])