freedreno/ir3: Use LDIB for coherent image loads on a5xx.
If the coherent flag is present, then we need to not have an incoherent cache between us and previous stores to the image that were also decorated as coherent. isam apparently (unsurprisingly) goes through a texture cache. Use ldib instead, so that we don't get the wrong result. We would need a similar fix for a4xx, but that uses ldgb and I don't have hardware to test on. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12704>
This commit is contained in:
parent
2b6729883a
commit
1cc8523c5c
|
@ -134,9 +134,6 @@ KHR-GLES31.core.internalformat.copy_tex_image.alpha,Fail
|
|||
KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls1,Fail
|
||||
KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2,Fail
|
||||
|
||||
# "Got red: 1, expected 0.00392157, at (1, 0)"
|
||||
KHR-GLES31.core.compute_shader.resource-image,Fail
|
||||
|
||||
# "../src/gallium/drivers/freedreno/a5xx/fd5_emit.c:82: fd5_emit_const_bo: Assertion `dst_off % 4 == 0' failed."
|
||||
KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-arrays,Fail
|
||||
KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-elements,Fail
|
||||
|
|
|
@ -2174,6 +2174,9 @@ INSTR3F(G, ATOMIC_OR)
|
|||
INSTR3F(G, ATOMIC_XOR)
|
||||
#elif GPU >= 400
|
||||
INSTR3(LDGB)
|
||||
#if GPU >= 500
|
||||
INSTR3(LDIB)
|
||||
#endif
|
||||
INSTR4NODST(STGB)
|
||||
INSTR4NODST(STIB)
|
||||
INSTR4F(G, ATOMIC_ADD)
|
||||
|
|
|
@ -24,7 +24,8 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define GPU 400
|
||||
/* 500 gets us LDIB but doesn't change any other a4xx instructions */
|
||||
#define GPU 500
|
||||
|
||||
#include "ir3_context.h"
|
||||
#include "ir3_image.h"
|
||||
|
@ -227,6 +228,32 @@ get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr,
|
|||
return ir3_collect(ctx, offset, create_immed(b, 0));
|
||||
}
|
||||
|
||||
/* src[] = { deref, coord, sample_index }. const_index[] = {} */
|
||||
static void
|
||||
emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
|
||||
struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
|
||||
struct ir3_instruction *offset = get_image_offset(ctx, intr, coords, true);
|
||||
unsigned ncoords = ir3_get_image_coords(intr, NULL);
|
||||
unsigned ncomp =
|
||||
ir3_get_num_components_for_image_format(nir_intrinsic_format(intr));
|
||||
|
||||
struct ir3_instruction *ldib = ir3_LDIB(
|
||||
b, ibo, 0, offset, 0, ir3_create_collect(ctx, coords, ncoords), 0);
|
||||
ldib->dsts[0]->wrmask = MASK(intr->num_components);
|
||||
ldib->cat6.iim_val = ncomp;
|
||||
ldib->cat6.d = ncoords;
|
||||
ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
|
||||
ldib->cat6.typed = true;
|
||||
ldib->barrier_class = IR3_BARRIER_IMAGE_R;
|
||||
ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
|
||||
|
||||
ir3_split_dest(b, dst, ldib, 0, intr->num_components);
|
||||
}
|
||||
|
||||
/* src[] = { index, coord, sample_index, value }. const_index[] = {} */
|
||||
static void
|
||||
emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
|
@ -332,6 +359,7 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
|
|||
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
|
||||
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
|
||||
.emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
|
||||
.emit_intrinsic_load_image = emit_intrinsic_load_image,
|
||||
.emit_intrinsic_store_image = emit_intrinsic_store_image,
|
||||
.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
|
||||
.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
|
||||
|
|
|
@ -1212,7 +1212,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
/* Coherent accesses have to go directly to memory, rather than through
|
||||
* ISAM's texture cache (which isn't coherent with image stores).
|
||||
*/
|
||||
if (nir_intrinsic_access(intr) & ACCESS_COHERENT && ctx->compiler->gen >= 6) {
|
||||
if (nir_intrinsic_access(intr) & ACCESS_COHERENT && ctx->compiler->gen >= 5) {
|
||||
ctx->funcs->emit_intrinsic_load_image(ctx, intr, dst);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -788,9 +788,12 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
|||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_image_size:
|
||||
if (compiler->gen < 6) {
|
||||
if (compiler->gen < 6 &&
|
||||
!(intr->intrinsic == nir_intrinsic_image_load &&
|
||||
!(nir_intrinsic_access(intr) & ACCESS_COHERENT))) {
|
||||
idx = nir_src_as_uint(intr->src[0]);
|
||||
if (layout->image_dims.mask & (1 << idx))
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue