From 5c038b3f02921dc0ea5348db0d8b417739a456f7 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 2 Dec 2021 14:33:17 +0000 Subject: [PATCH] nir: add _amd global access intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are the same as the normal ones, but they take an unsigned 32-bit offset in BASE and another unsigned 32-bit offset in the last source. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 15 +++++++++++++++ src/compiler/nir/nir_gather_info.c | 1 + src/compiler/nir/nir_intrinsics.py | 11 +++++++++-- src/compiler/nir/nir_opt_uniform_atomics.c | 21 +++++++++++++++++---- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 8d45baaa03b..2d03a9d787e 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -333,6 +333,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_shared: case nir_intrinsic_load_global: case nir_intrinsic_load_global_constant: + case nir_intrinsic_load_global_amd: case nir_intrinsic_load_uniform: case nir_intrinsic_load_constant: case nir_intrinsic_load_sample_pos_from_id: @@ -509,6 +510,20 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_global_atomic_fmin: case nir_intrinsic_global_atomic_fmax: case nir_intrinsic_global_atomic_fcomp_swap: + case nir_intrinsic_global_atomic_add_amd: + case nir_intrinsic_global_atomic_imin_amd: + case nir_intrinsic_global_atomic_umin_amd: + case nir_intrinsic_global_atomic_imax_amd: + case nir_intrinsic_global_atomic_umax_amd: + case nir_intrinsic_global_atomic_and_amd: + case nir_intrinsic_global_atomic_or_amd: + case nir_intrinsic_global_atomic_xor_amd: + case nir_intrinsic_global_atomic_exchange_amd: + case nir_intrinsic_global_atomic_comp_swap_amd: + case nir_intrinsic_global_atomic_fadd_amd: + case nir_intrinsic_global_atomic_fmin_amd: + case nir_intrinsic_global_atomic_fmax_amd: + case nir_intrinsic_global_atomic_fcomp_swap_amd: case nir_intrinsic_atomic_counter_add: case nir_intrinsic_atomic_counter_min: case nir_intrinsic_atomic_counter_max: diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 847aa1e697b..68d196ac06e 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -409,6 +409,7 @@ nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr) case nir_intrinsic_ssbo_atomic_xor_ir3: case nir_intrinsic_store_global: case nir_intrinsic_store_global_ir3: + case nir_intrinsic_store_global_amd: case nir_intrinsic_store_ssbo: case nir_intrinsic_store_ssbo_ir3: return true; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 1db325caabf..74b77574774 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -705,7 +705,8 @@ def memory_atomic_data1(name): intrinsic("deref_atomic_" + name, src_comp=[-1, 1], dest_comp=1, indices=[ACCESS]) intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS]) intrinsic("shared_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE]) - intrinsic("global_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE]) + intrinsic("global_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[]) + intrinsic("global_atomic_" + name + "_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) if not name.startswith('f'): intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1], dest_comp=1, indices=[BASE]) @@ -713,7 +714,8 @@ def memory_atomic_data2(name): intrinsic("deref_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS]) intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS]) intrinsic("shared_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) - intrinsic("global_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) + intrinsic("global_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[]) + intrinsic("global_atomic_" + name + "_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE]) if not name.startswith('f'): intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1, 1], dest_comp=1, indices=[BASE]) @@ -1243,6 +1245,11 @@ intrinsic("load_buffer_amd", src_comp=[4, 1, 1], dest_comp=0, indices=[BASE, IS_ # src[] = { store value, descriptor, base address, scalar offset } intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1], indices=[BASE, WRITE_MASK, IS_SWIZZLED, SLC_AMD, MEMORY_MODES]) +# src[] = { address, unsigned 32-bit offset }. +load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) +# src[] = { value, address, unsigned 32-bit offset }. +store("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET, WRITE_MASK]) + # Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0} intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) diff --git a/src/compiler/nir/nir_opt_uniform_atomics.c b/src/compiler/nir/nir_opt_uniform_atomics.c index 6746a94fead..e4564b0653f 100644 --- a/src/compiler/nir/nir_opt_uniform_atomics.c +++ b/src/compiler/nir/nir_opt_uniform_atomics.c @@ -40,19 +40,27 @@ #include "nir/nir_builder.h" static nir_op -parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src) +parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src, + unsigned *offset2_src) { switch (op) { #define OP_NOIMG(intrin, alu) \ case nir_intrinsic_ssbo_atomic_##intrin: \ *offset_src = 1; \ *data_src = 2; \ + *offset2_src = *offset_src; \ return nir_op_##alu; \ case nir_intrinsic_shared_atomic_##intrin: \ case nir_intrinsic_global_atomic_##intrin: \ case nir_intrinsic_deref_atomic_##intrin: \ *offset_src = 0; \ *data_src = 1; \ + *offset2_src = *offset_src; \ + return nir_op_##alu; \ + case nir_intrinsic_global_atomic_##intrin##_amd: \ + *offset_src = 0; \ + *data_src = 1; \ + *offset2_src = 2; \ return nir_op_##alu; #define OP(intrin, alu) \ OP_NOIMG(intrin, alu) \ @@ -61,6 +69,7 @@ parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src) case nir_intrinsic_bindless_image_atomic_##intrin: \ *offset_src = 1; \ *data_src = 3; \ + *offset2_src = *offset_src; \ return nir_op_##alu; OP(add, iadd) OP(imin, imin) @@ -201,7 +210,8 @@ optimize_atomic(nir_builder *b, nir_intrinsic_instr *intrin, bool return_prev) { unsigned offset_src = 0; unsigned data_src = 0; - nir_op op = parse_atomic_op(intrin->intrinsic, &offset_src, &data_src); + unsigned offset2_src = 0; + nir_op op = parse_atomic_op(intrin->intrinsic, &offset_src, &data_src, &offset2_src); nir_ssa_def *data = intrin->src[data_src].ssa; /* Separate uniform reduction and scan is faster than doing a combined scan+reduce */ @@ -285,12 +295,15 @@ opt_uniform_atomics(nir_function_impl *impl) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - unsigned offset_src, data_src; - if (parse_atomic_op(intrin->intrinsic, &offset_src, &data_src) == nir_num_opcodes) + unsigned offset_src, data_src, offset2_src; + if (parse_atomic_op(intrin->intrinsic, &offset_src, &data_src, &offset2_src) == + nir_num_opcodes) continue; if (nir_src_is_divergent(intrin->src[offset_src])) continue; + if (nir_src_is_divergent(intrin->src[offset2_src])) + continue; if (is_atomic_already_optimized(b.shader, intrin)) continue;