freedreno/ir3: handle global atomics
Only for a6xx since we don't know the instructions for global atomics on previous gens. Per Qualcomm's docs in OpenCL atomics are only supported since a5xx together with Generic memory space. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8717>
This commit is contained in:
parent
5d5b1fc472
commit
99388f0c27
|
@ -344,6 +344,16 @@ nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_global_atomic_umax:
|
||||
case nir_intrinsic_global_atomic_umin:
|
||||
case nir_intrinsic_global_atomic_xor:
|
||||
case nir_intrinsic_global_atomic_add_ir3:
|
||||
case nir_intrinsic_global_atomic_and_ir3:
|
||||
case nir_intrinsic_global_atomic_comp_swap_ir3:
|
||||
case nir_intrinsic_global_atomic_exchange_ir3:
|
||||
case nir_intrinsic_global_atomic_imax_ir3:
|
||||
case nir_intrinsic_global_atomic_imin_ir3:
|
||||
case nir_intrinsic_global_atomic_or_ir3:
|
||||
case nir_intrinsic_global_atomic_umax_ir3:
|
||||
case nir_intrinsic_global_atomic_umin_ir3:
|
||||
case nir_intrinsic_global_atomic_xor_ir3:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
|
|
|
@ -659,18 +659,25 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
|
|||
# 1: The data parameter to the atomic function (i.e. the value to add
|
||||
# in shared_atomic_add, etc).
|
||||
# 2: For CompSwap only: the second data parameter.
|
||||
#
|
||||
# IR3 global operations take 32b vec2 as memory address. IR3 doesn't support
|
||||
# float atomics.
|
||||
|
||||
def memory_atomic_data1(name):
|
||||
intrinsic("deref_atomic_" + name, src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
|
||||
intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
|
||||
intrinsic("shared_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
|
||||
intrinsic("global_atomic_" + name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
|
||||
if not name.startswith('f'):
|
||||
intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1], dest_comp=1, indices=[BASE])
|
||||
|
||||
def memory_atomic_data2(name):
|
||||
intrinsic("deref_atomic_" + name, src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
|
||||
intrinsic("ssbo_atomic_" + name, src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS])
|
||||
intrinsic("shared_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||
intrinsic("global_atomic_" + name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||
if not name.startswith('f'):
|
||||
intrinsic("global_atomic_" + name + "_ir3", src_comp=[2, 1, 1], dest_comp=1, indices=[BASE])
|
||||
|
||||
memory_atomic_data1("add")
|
||||
memory_atomic_data1("imin")
|
||||
|
|
|
@ -355,6 +355,12 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
return atomic;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
unreachable("Global atomic are unimplemented on A5xx");
|
||||
}
|
||||
|
||||
const struct ir3_context_funcs ir3_a4xx_funcs = {
|
||||
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
|
||||
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
|
||||
|
@ -365,4 +371,5 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
|
|||
.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
|
||||
.emit_intrinsic_load_global_ir3 = NULL,
|
||||
.emit_intrinsic_store_global_ir3 = NULL,
|
||||
.emit_intrinsic_atomic_global = emit_intrinsic_atomic_global,
|
||||
};
|
||||
|
|
|
@ -441,6 +441,73 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
|
|||
stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *addr, *atomic, *src1;
|
||||
struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[1])[0];
|
||||
type_t type = TYPE_U32;
|
||||
|
||||
addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0],
|
||||
ir3_get_src(ctx, &intr->src[0])[1]);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_global_atomic_comp_swap_ir3) {
|
||||
struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[2])[0];
|
||||
src1 = ir3_collect(b, compare, value);
|
||||
} else {
|
||||
src1 = value;
|
||||
}
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_global_atomic_add_ir3:
|
||||
atomic = ir3_ATOMIC_G_ADD(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_imin_ir3:
|
||||
atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_umin_ir3:
|
||||
atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_imax_ir3:
|
||||
atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
|
||||
type = TYPE_S32;
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_umax_ir3:
|
||||
atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_and_ir3:
|
||||
atomic = ir3_ATOMIC_G_AND(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_or_ir3:
|
||||
atomic = ir3_ATOMIC_G_OR(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_xor_ir3:
|
||||
atomic = ir3_ATOMIC_G_XOR(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_exchange_ir3:
|
||||
atomic = ir3_ATOMIC_G_XCHG(b, addr, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_comp_swap_ir3:
|
||||
atomic = ir3_ATOMIC_G_CMPXCHG(b, addr, 0, src1, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown global atomic op");
|
||||
}
|
||||
|
||||
atomic->cat6.iim_val = 1;
|
||||
atomic->cat6.d = 1;
|
||||
atomic->cat6.type = type;
|
||||
atomic->barrier_class = IR3_BARRIER_BUFFER_W;
|
||||
atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
|
||||
|
||||
/* even if nothing consume the result, we can't DCE the instruction: */
|
||||
array_insert(b, b->keeps, atomic);
|
||||
|
||||
return atomic;
|
||||
}
|
||||
|
||||
const struct ir3_context_funcs ir3_a6xx_funcs = {
|
||||
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
|
||||
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
|
||||
|
@ -451,4 +518,5 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
|
|||
.emit_intrinsic_image_size = emit_intrinsic_image_size,
|
||||
.emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
|
||||
.emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
|
||||
.emit_intrinsic_atomic_global = emit_intrinsic_atomic_global,
|
||||
};
|
||||
|
|
|
@ -2250,6 +2250,20 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_bindless_resource_ir3:
|
||||
dst[0] = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
break;
|
||||
case nir_intrinsic_global_atomic_add_ir3:
|
||||
case nir_intrinsic_global_atomic_imin_ir3:
|
||||
case nir_intrinsic_global_atomic_umin_ir3:
|
||||
case nir_intrinsic_global_atomic_imax_ir3:
|
||||
case nir_intrinsic_global_atomic_umax_ir3:
|
||||
case nir_intrinsic_global_atomic_and_ir3:
|
||||
case nir_intrinsic_global_atomic_or_ir3:
|
||||
case nir_intrinsic_global_atomic_xor_ir3:
|
||||
case nir_intrinsic_global_atomic_exchange_ir3:
|
||||
case nir_intrinsic_global_atomic_comp_swap_ir3: {
|
||||
dst[0] = ctx->funcs->emit_intrinsic_atomic_global(ctx, intr);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
|
||||
nir_intrinsic_infos[intr->intrinsic].name);
|
||||
|
|
|
@ -188,6 +188,8 @@ struct ir3_context_funcs {
|
|||
struct ir3_instruction **dst);
|
||||
void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr);
|
||||
struct ir3_instruction *(*emit_intrinsic_atomic_global)(
|
||||
struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
||||
};
|
||||
|
||||
extern const struct ir3_context_funcs ir3_a4xx_funcs;
|
||||
|
|
|
@ -229,9 +229,24 @@ lower_64b_global_filter(const nir_instr *instr, const void *unused)
|
|||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
return (intr->intrinsic == nir_intrinsic_load_global) ||
|
||||
(intr->intrinsic == nir_intrinsic_load_global_constant) ||
|
||||
(intr->intrinsic == nir_intrinsic_store_global);
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_store_global:
|
||||
case nir_intrinsic_global_atomic_add:
|
||||
case nir_intrinsic_global_atomic_imin:
|
||||
case nir_intrinsic_global_atomic_umin:
|
||||
case nir_intrinsic_global_atomic_imax:
|
||||
case nir_intrinsic_global_atomic_umax:
|
||||
case nir_intrinsic_global_atomic_and:
|
||||
case nir_intrinsic_global_atomic_or:
|
||||
case nir_intrinsic_global_atomic_xor:
|
||||
case nir_intrinsic_global_atomic_exchange:
|
||||
case nir_intrinsic_global_atomic_comp_swap:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
|
@ -250,6 +265,32 @@ lower_64b_global(nir_builder *b, nir_instr *instr, void *unused)
|
|||
* those up into max 4 components per load/store.
|
||||
*/
|
||||
|
||||
#define GLOBAL_IR3_2SRC(name) \
|
||||
case nir_intrinsic_##name: { \
|
||||
return nir_build_##name##_ir3(b, nir_dest_bit_size(intr->dest), addr, \
|
||||
nir_ssa_for_src(b, intr->src[1], 1)); \
|
||||
}
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
GLOBAL_IR3_2SRC(global_atomic_add)
|
||||
GLOBAL_IR3_2SRC(global_atomic_imin)
|
||||
GLOBAL_IR3_2SRC(global_atomic_umin)
|
||||
GLOBAL_IR3_2SRC(global_atomic_imax)
|
||||
GLOBAL_IR3_2SRC(global_atomic_umax)
|
||||
GLOBAL_IR3_2SRC(global_atomic_and)
|
||||
GLOBAL_IR3_2SRC(global_atomic_or)
|
||||
GLOBAL_IR3_2SRC(global_atomic_xor)
|
||||
GLOBAL_IR3_2SRC(global_atomic_exchange)
|
||||
case nir_intrinsic_global_atomic_comp_swap:
|
||||
return nir_build_global_atomic_comp_swap_ir3(
|
||||
b, nir_dest_bit_size(intr->dest), addr,
|
||||
nir_ssa_for_src(b, intr->src[1], 1),
|
||||
nir_ssa_for_src(b, intr->src[2], 1));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#undef GLOBAL_IR3_2SRC
|
||||
|
||||
if (load) {
|
||||
unsigned num_comp = nir_intrinsic_dest_components(intr);
|
||||
nir_ssa_def *components[num_comp];
|
||||
|
|
Loading…
Reference in New Issue