pan/midg: Add a pass to lower non-logbase2 global/shared loads

Compute shaders might do vec3(Xbits) loads which are translated
to LD.<next_pow2(3 * X)> by the midgard compiler. This might cause
out-of-bound accesses potentially leading to pagefaults if the
access is at the end of a BO. One solution to avoid that (maybe not
the best) is to split non-log2 loads to make sure we only read what's
requested.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14885>
This commit is contained in:
Boris Brezillon 2021-06-09 10:01:51 +02:00 committed by Alyssa Rosenzweig
parent 3f9bce08e1
commit 59ea6e2e27
2 changed files with 70 additions and 3 deletions

View File

@ -46,11 +46,8 @@ dEQP-GLES31.functional.separate_shader.random.68,Fail
dEQP-GLES31.functional.separate_shader.random.79,Fail
dEQP-GLES31.functional.separate_shader.random.80,Fail
dEQP-GLES31.functional.separate_shader.random.89,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.findlsb.ivec3_lowp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.ivec2_lowp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.ivec3_lowp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.uvec2_lowp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.uvec3_lowp_compute,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_fragment,Fail
dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_fragment,Fail
dEQP-GLES31.functional.synchronization.in_invocation.image_alias_overwrite,Crash

View File

@ -245,6 +245,74 @@ midgard_nir_lower_fdot2(nir_shader *shader)
NULL);
}
static bool
midgard_nir_lower_global_load_instr(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_global &&
intr->intrinsic != nir_intrinsic_load_shared)
return false;
unsigned compsz = nir_dest_bit_size(intr->dest);
unsigned totalsz = compsz * nir_dest_num_components(intr->dest);
/* 8, 16, 32, 64 and 128 bit loads don't need to be lowered */
if (util_bitcount(totalsz) < 2 && totalsz <= 128)
return false;
b->cursor = nir_before_instr(instr);
assert(intr->src[0].is_ssa);
nir_ssa_def *addr = intr->src[0].ssa;
nir_ssa_def *comps[MIR_VEC_COMPONENTS];
unsigned ncomps = 0;
while (totalsz) {
unsigned loadsz = MIN2(1 << (util_last_bit(totalsz) - 1), 128);
unsigned loadncomps = loadsz / compsz;
nir_ssa_def *load;
if (intr->intrinsic == nir_intrinsic_load_global) {
load = nir_load_global(b, addr, compsz / 8, loadncomps, compsz);
} else {
assert(intr->intrinsic == nir_intrinsic_load_shared);
nir_intrinsic_instr *shared_load =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_shared);
shared_load->num_components = loadncomps;
shared_load->src[0] = nir_src_for_ssa(addr);
nir_intrinsic_set_align(shared_load, compsz / 8, 0);
nir_intrinsic_set_base(shared_load, nir_intrinsic_base(intr));
nir_ssa_dest_init(&shared_load->instr, &shared_load->dest,
shared_load->num_components, compsz, NULL);
nir_builder_instr_insert(b, &shared_load->instr);
load = &shared_load->dest.ssa;
}
for (unsigned i = 0; i < loadncomps; i++)
comps[ncomps++] = nir_channel(b, load, i);
totalsz -= loadsz;
addr = nir_iadd(b, addr, nir_imm_intN_t(b, loadsz / 8, addr->bit_size));
}
assert(ncomps == nir_dest_num_components(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, comps, ncomps));
return true;
}
static bool
midgard_nir_lower_global_load(nir_shader *shader)
{
return nir_shader_instructions_pass(shader,
midgard_nir_lower_global_load_instr,
nir_metadata_block_index | nir_metadata_dominance,
NULL);
}
static bool
mdg_is_64(const nir_instr *instr, const void *_unused)
{
@ -3101,6 +3169,8 @@ midgard_compile_shader_nir(nir_shader *nir,
NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
NIR_PASS_V(nir, midgard_nir_lower_global_load);
/* Optimisation passes */
optimise_nir(nir, ctx->quirks, inputs->is_blend);