From 1cc8523c5cc5730b7b38bf25424b12b3220d151e Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Thu, 2 Sep 2021 09:23:15 -0700 Subject: [PATCH] freedreno/ir3: Use LDIB for coherent image loads on a5xx. If the coherent flag is present, then we need to not have an incoherent cache between us and previous stores to the image that were also decorated as coherent. isam apparently (unsurprisingly) goes through a texture cache. Use ldib instead, so that we don't get the wrong result. We would need a similar fix for a4xx, but that uses ldgb and I don't have hardware to test on. Part-of: --- .../ci/deqp-freedreno-a530-fails.txt | 3 -- src/freedreno/ir3/ir3.h | 3 ++ src/freedreno/ir3/ir3_a4xx.c | 30 ++++++++++++++++++- src/freedreno/ir3/ir3_compiler_nir.c | 2 +- src/freedreno/ir3/ir3_nir.c | 5 +++- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/freedreno/ci/deqp-freedreno-a530-fails.txt b/src/freedreno/ci/deqp-freedreno-a530-fails.txt index dd57224fbdb..8821c94ae85 100644 --- a/src/freedreno/ci/deqp-freedreno-a530-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a530-fails.txt @@ -134,9 +134,6 @@ KHR-GLES31.core.internalformat.copy_tex_image.alpha,Fail KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls1,Fail KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2,Fail -# "Got red: 1, expected 0.00392157, at (1, 0)" -KHR-GLES31.core.compute_shader.resource-image,Fail - # "../src/gallium/drivers/freedreno/a5xx/fd5_emit.c:82: fd5_emit_const_bo: Assertion `dst_off % 4 == 0' failed." KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-arrays,Fail KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-elements,Fail diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 6c2d8c7553f..b767fa14f0b 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2174,6 +2174,9 @@ INSTR3F(G, ATOMIC_OR) INSTR3F(G, ATOMIC_XOR) #elif GPU >= 400 INSTR3(LDGB) +#if GPU >= 500 +INSTR3(LDIB) +#endif INSTR4NODST(STGB) INSTR4NODST(STIB) INSTR4F(G, ATOMIC_ADD) diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 820a692f091..730323cc0d0 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -24,7 +24,8 @@ * Rob Clark */ -#define GPU 400 +/* 500 gets us LDIB but doesn't change any other a4xx instructions */ +#define GPU 500 #include "ir3_context.h" #include "ir3_image.h" @@ -227,6 +228,32 @@ get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr, return ir3_collect(ctx, offset, create_immed(b, 0)); } +/* src[] = { deref, coord, sample_index }. const_index[] = {} */ +static void +emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); + struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]); + struct ir3_instruction *offset = get_image_offset(ctx, intr, coords, true); + unsigned ncoords = ir3_get_image_coords(intr, NULL); + unsigned ncomp = + ir3_get_num_components_for_image_format(nir_intrinsic_format(intr)); + + struct ir3_instruction *ldib = ir3_LDIB( + b, ibo, 0, offset, 0, ir3_create_collect(ctx, coords, ncoords), 0); + ldib->dsts[0]->wrmask = MASK(intr->num_components); + ldib->cat6.iim_val = ncomp; + ldib->cat6.d = ncoords; + ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr); + ldib->cat6.typed = true; + ldib->barrier_class = IR3_BARRIER_IMAGE_R; + ldib->barrier_conflict = IR3_BARRIER_IMAGE_W; + + ir3_split_dest(b, dst, ldib, 0, intr->num_components); +} + /* src[] = { index, coord, sample_index, value }. const_index[] = {} */ static void emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) @@ -332,6 +359,7 @@ const struct ir3_context_funcs ir3_a4xx_funcs = { .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo, .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo, .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo, + .emit_intrinsic_load_image = emit_intrinsic_load_image, .emit_intrinsic_store_image = emit_intrinsic_store_image, .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image, .emit_intrinsic_image_size = emit_intrinsic_image_size_tex, diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index e6a1c0c289c..4157e417e08 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1212,7 +1212,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, /* Coherent accesses have to go directly to memory, rather than through * ISAM's texture cache (which isn't coherent with image stores). */ - if (nir_intrinsic_access(intr) & ACCESS_COHERENT && ctx->compiler->gen >= 6) { + if (nir_intrinsic_access(intr) & ACCESS_COHERENT && ctx->compiler->gen >= 5) { ctx->funcs->emit_intrinsic_load_image(ctx, intr, dst); return; } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 4739d9e661b..b381c93606f 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -788,9 +788,12 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_load: case nir_intrinsic_image_store: case nir_intrinsic_image_size: - if (compiler->gen < 6) { + if (compiler->gen < 6 && + !(intr->intrinsic == nir_intrinsic_image_load && + !(nir_intrinsic_access(intr) & ACCESS_COHERENT))) { idx = nir_src_as_uint(intr->src[0]); if (layout->image_dims.mask & (1 << idx)) break;