nir: add _amd global access intrinsics
These are the same as the normal ones, but they take an unsigned 32-bit offset in BASE and another unsigned 32-bit offset in the last source. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14124>
This commit is contained in:
parent
391bf3ea30
commit
5c038b3f02
|
@ -333,6 +333,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
||||||
case nir_intrinsic_load_shared:
|
case nir_intrinsic_load_shared:
|
||||||
case nir_intrinsic_load_global:
|
case nir_intrinsic_load_global:
|
||||||
case nir_intrinsic_load_global_constant:
|
case nir_intrinsic_load_global_constant:
|
||||||
|
case nir_intrinsic_load_global_amd:
|
||||||
case nir_intrinsic_load_uniform:
|
case nir_intrinsic_load_uniform:
|
||||||
case nir_intrinsic_load_constant:
|
case nir_intrinsic_load_constant:
|
||||||
case nir_intrinsic_load_sample_pos_from_id:
|
case nir_intrinsic_load_sample_pos_from_id:
|
||||||
|
@ -509,6 +510,20 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
||||||
case nir_intrinsic_global_atomic_fmin:
|
case nir_intrinsic_global_atomic_fmin:
|
||||||
case nir_intrinsic_global_atomic_fmax:
|
case nir_intrinsic_global_atomic_fmax:
|
||||||
case nir_intrinsic_global_atomic_fcomp_swap:
|
case nir_intrinsic_global_atomic_fcomp_swap:
|
||||||
|
case nir_intrinsic_global_atomic_add_amd:
|
||||||
|
case nir_intrinsic_global_atomic_imin_amd:
|
||||||
|
case nir_intrinsic_global_atomic_umin_amd:
|
||||||
|
case nir_intrinsic_global_atomic_imax_amd:
|
||||||
|
case nir_intrinsic_global_atomic_umax_amd:
|
||||||
|
case nir_intrinsic_global_atomic_and_amd:
|
||||||
|
case nir_intrinsic_global_atomic_or_amd:
|
||||||
|
case nir_intrinsic_global_atomic_xor_amd:
|
||||||
|
case nir_intrinsic_global_atomic_exchange_amd:
|
||||||
|
case nir_intrinsic_global_atomic_comp_swap_amd:
|
||||||
|
case nir_intrinsic_global_atomic_fadd_amd:
|
||||||
|
case nir_intrinsic_global_atomic_fmin_amd:
|
||||||
|
case nir_intrinsic_global_atomic_fmax_amd:
|
||||||
|
case nir_intrinsic_global_atomic_fcomp_swap_amd:
|
||||||
case nir_intrinsic_atomic_counter_add:
|
case nir_intrinsic_atomic_counter_add:
|
||||||
case nir_intrinsic_atomic_counter_min:
|
case nir_intrinsic_atomic_counter_min:
|
||||||
case nir_intrinsic_atomic_counter_max:
|
case nir_intrinsic_atomic_counter_max:
|
||||||
|
|
|
@ -409,6 +409,7 @@ nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
|
||||||
case nir_intrinsic_ssbo_atomic_xor_ir3:
|
case nir_intrinsic_ssbo_atomic_xor_ir3:
|
||||||
case nir_intrinsic_store_global:
|
case nir_intrinsic_store_global:
|
||||||
case nir_intrinsic_store_global_ir3:
|
case nir_intrinsic_store_global_ir3:
|
||||||
|
case nir_intrinsic_store_global_amd:
|
||||||
case nir_intrinsic_store_ssbo:
|
case nir_intrinsic_store_ssbo:
|
||||||
case nir_intrinsic_store_ssbo_ir3:
|
case nir_intrinsic_store_ssbo_ir3:
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -705,7 +705,8 @@ def memory_atomic_data1(name):
|
||||||
intrinsic("deref_atomic_" + name, src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
|
intrinsic("deref_atomic_" + name, src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
|
||||||
intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
|
intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
|
||||||
intrinsic("shared_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
|
intrinsic("shared_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
|
||||||
intrinsic("global_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
|
intrinsic("global_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[])
|
||||||
|
intrinsic("global_atomic_" + name + "_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||||
if not name.startswith('f'):
|
if not name.startswith('f'):
|
||||||
intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1], dest_comp=1, indices=[BASE])
|
intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1], dest_comp=1, indices=[BASE])
|
||||||
|
|
||||||
|
@ -713,7 +714,8 @@ def memory_atomic_data2(name):
|
||||||
intrinsic("deref_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
|
intrinsic("deref_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
|
||||||
intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS])
|
intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS])
|
||||||
intrinsic("shared_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
intrinsic("shared_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||||
intrinsic("global_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
intrinsic("global_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[])
|
||||||
|
intrinsic("global_atomic_" + name + "_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE])
|
||||||
if not name.startswith('f'):
|
if not name.startswith('f'):
|
||||||
intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1, 1], dest_comp=1, indices=[BASE])
|
intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1, 1], dest_comp=1, indices=[BASE])
|
||||||
|
|
||||||
|
@ -1243,6 +1245,11 @@ intrinsic("load_buffer_amd", src_comp=[4, 1, 1], dest_comp=0, indices=[BASE, IS_
|
||||||
# src[] = { store value, descriptor, base address, scalar offset }
|
# src[] = { store value, descriptor, base address, scalar offset }
|
||||||
intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1], indices=[BASE, WRITE_MASK, IS_SWIZZLED, SLC_AMD, MEMORY_MODES])
|
intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1], indices=[BASE, WRITE_MASK, IS_SWIZZLED, SLC_AMD, MEMORY_MODES])
|
||||||
|
|
||||||
|
# src[] = { address, unsigned 32-bit offset }.
|
||||||
|
load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||||
|
# src[] = { value, address, unsigned 32-bit offset }.
|
||||||
|
store("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET, WRITE_MASK])
|
||||||
|
|
||||||
# Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0}
|
# Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0}
|
||||||
intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||||
|
|
||||||
|
|
|
@ -40,19 +40,27 @@
|
||||||
#include "nir/nir_builder.h"
|
#include "nir/nir_builder.h"
|
||||||
|
|
||||||
static nir_op
|
static nir_op
|
||||||
parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src)
|
parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src,
|
||||||
|
unsigned *offset2_src)
|
||||||
{
|
{
|
||||||
switch (op) {
|
switch (op) {
|
||||||
#define OP_NOIMG(intrin, alu) \
|
#define OP_NOIMG(intrin, alu) \
|
||||||
case nir_intrinsic_ssbo_atomic_##intrin: \
|
case nir_intrinsic_ssbo_atomic_##intrin: \
|
||||||
*offset_src = 1; \
|
*offset_src = 1; \
|
||||||
*data_src = 2; \
|
*data_src = 2; \
|
||||||
|
*offset2_src = *offset_src; \
|
||||||
return nir_op_##alu; \
|
return nir_op_##alu; \
|
||||||
case nir_intrinsic_shared_atomic_##intrin: \
|
case nir_intrinsic_shared_atomic_##intrin: \
|
||||||
case nir_intrinsic_global_atomic_##intrin: \
|
case nir_intrinsic_global_atomic_##intrin: \
|
||||||
case nir_intrinsic_deref_atomic_##intrin: \
|
case nir_intrinsic_deref_atomic_##intrin: \
|
||||||
*offset_src = 0; \
|
*offset_src = 0; \
|
||||||
*data_src = 1; \
|
*data_src = 1; \
|
||||||
|
*offset2_src = *offset_src; \
|
||||||
|
return nir_op_##alu; \
|
||||||
|
case nir_intrinsic_global_atomic_##intrin##_amd: \
|
||||||
|
*offset_src = 0; \
|
||||||
|
*data_src = 1; \
|
||||||
|
*offset2_src = 2; \
|
||||||
return nir_op_##alu;
|
return nir_op_##alu;
|
||||||
#define OP(intrin, alu) \
|
#define OP(intrin, alu) \
|
||||||
OP_NOIMG(intrin, alu) \
|
OP_NOIMG(intrin, alu) \
|
||||||
|
@ -61,6 +69,7 @@ parse_atomic_op(nir_intrinsic_op op, unsigned *offset_src, unsigned *data_src)
|
||||||
case nir_intrinsic_bindless_image_atomic_##intrin: \
|
case nir_intrinsic_bindless_image_atomic_##intrin: \
|
||||||
*offset_src = 1; \
|
*offset_src = 1; \
|
||||||
*data_src = 3; \
|
*data_src = 3; \
|
||||||
|
*offset2_src = *offset_src; \
|
||||||
return nir_op_##alu;
|
return nir_op_##alu;
|
||||||
OP(add, iadd)
|
OP(add, iadd)
|
||||||
OP(imin, imin)
|
OP(imin, imin)
|
||||||
|
@ -201,7 +210,8 @@ optimize_atomic(nir_builder *b, nir_intrinsic_instr *intrin, bool return_prev)
|
||||||
{
|
{
|
||||||
unsigned offset_src = 0;
|
unsigned offset_src = 0;
|
||||||
unsigned data_src = 0;
|
unsigned data_src = 0;
|
||||||
nir_op op = parse_atomic_op(intrin->intrinsic, &offset_src, &data_src);
|
unsigned offset2_src = 0;
|
||||||
|
nir_op op = parse_atomic_op(intrin->intrinsic, &offset_src, &data_src, &offset2_src);
|
||||||
nir_ssa_def *data = intrin->src[data_src].ssa;
|
nir_ssa_def *data = intrin->src[data_src].ssa;
|
||||||
|
|
||||||
/* Separate uniform reduction and scan is faster than doing a combined scan+reduce */
|
/* Separate uniform reduction and scan is faster than doing a combined scan+reduce */
|
||||||
|
@ -285,12 +295,15 @@ opt_uniform_atomics(nir_function_impl *impl)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
unsigned offset_src, data_src;
|
unsigned offset_src, data_src, offset2_src;
|
||||||
if (parse_atomic_op(intrin->intrinsic, &offset_src, &data_src) == nir_num_opcodes)
|
if (parse_atomic_op(intrin->intrinsic, &offset_src, &data_src, &offset2_src) ==
|
||||||
|
nir_num_opcodes)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (nir_src_is_divergent(intrin->src[offset_src]))
|
if (nir_src_is_divergent(intrin->src[offset_src]))
|
||||||
continue;
|
continue;
|
||||||
|
if (nir_src_is_divergent(intrin->src[offset2_src]))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (is_atomic_already_optimized(b.shader, intrin))
|
if (is_atomic_already_optimized(b.shader, intrin))
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue