diff --git a/src/panfrost/ci/panfrost-t860-fails.txt b/src/panfrost/ci/panfrost-t860-fails.txt index f0d2eee30ce..2d72c928bb4 100644 --- a/src/panfrost/ci/panfrost-t860-fails.txt +++ b/src/panfrost/ci/panfrost-t860-fails.txt @@ -46,11 +46,8 @@ dEQP-GLES31.functional.separate_shader.random.68,Fail dEQP-GLES31.functional.separate_shader.random.79,Fail dEQP-GLES31.functional.separate_shader.random.80,Fail dEQP-GLES31.functional.separate_shader.random.89,Fail -dEQP-GLES31.functional.shaders.builtin_functions.integer.findlsb.ivec3_lowp_compute,Fail dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.ivec2_lowp_compute,Fail -dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.ivec3_lowp_compute,Fail dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.uvec2_lowp_compute,Fail -dEQP-GLES31.functional.shaders.builtin_functions.integer.findmsb.uvec3_lowp_compute,Fail dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_fragment,Fail dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_fragment,Fail dEQP-GLES31.functional.synchronization.in_invocation.image_alias_overwrite,Crash diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 2a0bee92f6a..7e979df7e55 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -245,6 +245,74 @@ midgard_nir_lower_fdot2(nir_shader *shader) NULL); } +static bool +midgard_nir_lower_global_load_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_global && + intr->intrinsic != nir_intrinsic_load_shared) + return false; + + unsigned compsz = nir_dest_bit_size(intr->dest); + unsigned totalsz = compsz * nir_dest_num_components(intr->dest); + /* 8, 16, 32, 64 and 128 bit loads don't need to be lowered */ + if (util_bitcount(totalsz) < 2 && totalsz <= 128) + return false; + + b->cursor = nir_before_instr(instr); + + assert(intr->src[0].is_ssa); + nir_ssa_def *addr = intr->src[0].ssa; + + nir_ssa_def *comps[MIR_VEC_COMPONENTS]; + unsigned ncomps = 0; + + while (totalsz) { + unsigned loadsz = MIN2(1 << (util_last_bit(totalsz) - 1), 128); + unsigned loadncomps = loadsz / compsz; + + nir_ssa_def *load; + if (intr->intrinsic == nir_intrinsic_load_global) { + load = nir_load_global(b, addr, compsz / 8, loadncomps, compsz); + } else { + assert(intr->intrinsic == nir_intrinsic_load_shared); + nir_intrinsic_instr *shared_load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_shared); + shared_load->num_components = loadncomps; + shared_load->src[0] = nir_src_for_ssa(addr); + nir_intrinsic_set_align(shared_load, compsz / 8, 0); + nir_intrinsic_set_base(shared_load, nir_intrinsic_base(intr)); + nir_ssa_dest_init(&shared_load->instr, &shared_load->dest, + shared_load->num_components, compsz, NULL); + nir_builder_instr_insert(b, &shared_load->instr); + load = &shared_load->dest.ssa; + } + + for (unsigned i = 0; i < loadncomps; i++) + comps[ncomps++] = nir_channel(b, load, i); + + totalsz -= loadsz; + addr = nir_iadd(b, addr, nir_imm_intN_t(b, loadsz / 8, addr->bit_size)); + } + + assert(ncomps == nir_dest_num_components(intr->dest)); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, comps, ncomps)); + + return true; +} + +static bool +midgard_nir_lower_global_load(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, + midgard_nir_lower_global_load_instr, + nir_metadata_block_index | nir_metadata_dominance, + NULL); +} + static bool mdg_is_64(const nir_instr *instr, const void *_unused) { @@ -3101,6 +3169,8 @@ midgard_compile_shader_nir(nir_shader *nir, NIR_PASS_V(nir, pan_nir_lower_64bit_intrin); + NIR_PASS_V(nir, midgard_nir_lower_global_load); + /* Optimisation passes */ optimise_nir(nir, ctx->quirks, inputs->is_blend);