freedreno/ir3: Use LDIB for coherent image loads on a5xx.

If the coherent flag is present, then we need to not have an incoherent
cache between us and previous stores to the image that were also decorated
as coherent.  isam apparently (unsurprisingly) goes through a texture
cache.  Use ldib instead, so that we don't get the wrong result.

We would need a similar fix for a4xx, but that uses ldgb and I don't
have hardware to test on.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12704>
This commit is contained in:
Emma Anholt 2021-09-02 09:23:15 -07:00 committed by Marge Bot
parent 2b6729883a
commit 1cc8523c5c
5 changed files with 37 additions and 6 deletions

View File

@ -134,9 +134,6 @@ KHR-GLES31.core.internalformat.copy_tex_image.alpha,Fail
KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls1,Fail
KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2,Fail
# "Got red: 1, expected 0.00392157, at (1, 0)"
KHR-GLES31.core.compute_shader.resource-image,Fail
# "../src/gallium/drivers/freedreno/a5xx/fd5_emit.c:82: fd5_emit_const_bo: Assertion `dst_off % 4 == 0' failed."
KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-arrays,Fail
KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-elements,Fail

View File

@ -2174,6 +2174,9 @@ INSTR3F(G, ATOMIC_OR)
INSTR3F(G, ATOMIC_XOR)
#elif GPU >= 400
INSTR3(LDGB)
#if GPU >= 500
INSTR3(LDIB)
#endif
INSTR4NODST(STGB)
INSTR4NODST(STIB)
INSTR4F(G, ATOMIC_ADD)

View File

@ -24,7 +24,8 @@
* Rob Clark <robclark@freedesktop.org>
*/
#define GPU 400
/* 500 gets us LDIB but doesn't change any other a4xx instructions */
#define GPU 500
#include "ir3_context.h"
#include "ir3_image.h"
@ -227,6 +228,32 @@ get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr,
return ir3_collect(ctx, offset, create_immed(b, 0));
}
/* src[] = { deref, coord, sample_index }. const_index[] = {} */
static void
emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
struct ir3_instruction *offset = get_image_offset(ctx, intr, coords, true);
unsigned ncoords = ir3_get_image_coords(intr, NULL);
unsigned ncomp =
ir3_get_num_components_for_image_format(nir_intrinsic_format(intr));
struct ir3_instruction *ldib = ir3_LDIB(
b, ibo, 0, offset, 0, ir3_create_collect(ctx, coords, ncoords), 0);
ldib->dsts[0]->wrmask = MASK(intr->num_components);
ldib->cat6.iim_val = ncomp;
ldib->cat6.d = ncoords;
ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
ldib->cat6.typed = true;
ldib->barrier_class = IR3_BARRIER_IMAGE_R;
ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
ir3_split_dest(b, dst, ldib, 0, intr->num_components);
}
/* src[] = { index, coord, sample_index, value }. const_index[] = {} */
static void
emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
@ -332,6 +359,7 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
.emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
.emit_intrinsic_load_image = emit_intrinsic_load_image,
.emit_intrinsic_store_image = emit_intrinsic_store_image,
.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,

View File

@ -1212,7 +1212,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
/* Coherent accesses have to go directly to memory, rather than through
* ISAM's texture cache (which isn't coherent with image stores).
*/
if (nir_intrinsic_access(intr) & ACCESS_COHERENT && ctx->compiler->gen >= 6) {
if (nir_intrinsic_access(intr) & ACCESS_COHERENT && ctx->compiler->gen >= 5) {
ctx->funcs->emit_intrinsic_load_image(ctx, intr, dst);
return;
}

View File

@ -788,9 +788,12 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
case nir_intrinsic_image_size:
if (compiler->gen < 6) {
if (compiler->gen < 6 &&
!(intr->intrinsic == nir_intrinsic_image_load &&
!(nir_intrinsic_access(intr) & ACCESS_COHERENT))) {
idx = nir_src_as_uint(intr->src[0]);
if (layout->image_dims.mask & (1 << idx))
break;