pan/midg: Add a pass to lower non-logbase2 global/shared loads
Compute shaders might do vec3(Xbits) loads which are translated to LD.<next_pow2(3 * X)> by the midgard compiler. This might cause out-of-bound accesses potentially leading to pagefaults if the access is at the end of a BO. One solution to avoid that (maybe not the best) is to split non-log2 loads to make sure we only read what's requested. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14885>
This commit is contained in:
parent
3f9bce08e1
commit
59ea6e2e27
|
@ -46,11 +46,8 @@ dEQP-GLES31.functional.separate_shader.random.68,Fail
|
|||
dEQP-GLES31.functional.separate_shader.random.79,Fail
|
||||
dEQP-GLES31.functional.separate_shader.random.80,Fail
|
||||
dEQP-GLES31.functional.separate_shader.random.89,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.findlsb.ivec3_lowp_compute,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.ivec2_lowp_compute,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.ivec3_lowp_compute,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.uvec2_lowp_compute,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.uvec3_lowp_compute,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_fragment,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_fragment,Fail
|
||||
dEQP-GLES31.functional.synchronization.in_invocation.image_alias_overwrite,Crash
|
||||
|
|
|
@ -245,6 +245,74 @@ midgard_nir_lower_fdot2(nir_shader *shader)
|
|||
NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
midgard_nir_lower_global_load_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_global &&
|
||||
intr->intrinsic != nir_intrinsic_load_shared)
|
||||
return false;
|
||||
|
||||
unsigned compsz = nir_dest_bit_size(intr->dest);
|
||||
unsigned totalsz = compsz * nir_dest_num_components(intr->dest);
|
||||
/* 8, 16, 32, 64 and 128 bit loads don't need to be lowered */
|
||||
if (util_bitcount(totalsz) < 2 && totalsz <= 128)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
assert(intr->src[0].is_ssa);
|
||||
nir_ssa_def *addr = intr->src[0].ssa;
|
||||
|
||||
nir_ssa_def *comps[MIR_VEC_COMPONENTS];
|
||||
unsigned ncomps = 0;
|
||||
|
||||
while (totalsz) {
|
||||
unsigned loadsz = MIN2(1 << (util_last_bit(totalsz) - 1), 128);
|
||||
unsigned loadncomps = loadsz / compsz;
|
||||
|
||||
nir_ssa_def *load;
|
||||
if (intr->intrinsic == nir_intrinsic_load_global) {
|
||||
load = nir_load_global(b, addr, compsz / 8, loadncomps, compsz);
|
||||
} else {
|
||||
assert(intr->intrinsic == nir_intrinsic_load_shared);
|
||||
nir_intrinsic_instr *shared_load =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_shared);
|
||||
shared_load->num_components = loadncomps;
|
||||
shared_load->src[0] = nir_src_for_ssa(addr);
|
||||
nir_intrinsic_set_align(shared_load, compsz / 8, 0);
|
||||
nir_intrinsic_set_base(shared_load, nir_intrinsic_base(intr));
|
||||
nir_ssa_dest_init(&shared_load->instr, &shared_load->dest,
|
||||
shared_load->num_components, compsz, NULL);
|
||||
nir_builder_instr_insert(b, &shared_load->instr);
|
||||
load = &shared_load->dest.ssa;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < loadncomps; i++)
|
||||
comps[ncomps++] = nir_channel(b, load, i);
|
||||
|
||||
totalsz -= loadsz;
|
||||
addr = nir_iadd(b, addr, nir_imm_intN_t(b, loadsz / 8, addr->bit_size));
|
||||
}
|
||||
|
||||
assert(ncomps == nir_dest_num_components(intr->dest));
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, comps, ncomps));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
midgard_nir_lower_global_load(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
midgard_nir_lower_global_load_instr,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
mdg_is_64(const nir_instr *instr, const void *_unused)
|
||||
{
|
||||
|
@ -3101,6 +3169,8 @@ midgard_compile_shader_nir(nir_shader *nir,
|
|||
|
||||
NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
|
||||
|
||||
NIR_PASS_V(nir, midgard_nir_lower_global_load);
|
||||
|
||||
/* Optimisation passes */
|
||||
|
||||
optimise_nir(nir, ctx->quirks, inputs->is_blend);
|
||||
|
|
Loading…
Reference in New Issue