mirror of https://gitlab.freedesktop.org/mesa/mesa
7f6491b76d
Every nir_ssa_def is part of a chain of uses, implemented with doubly linked lists. That means each requires 2 * 64-bit = 16 bytes per def, which is memory intensive. Together they require 32 bytes per def. Not cool. To cut that memory use in half, we can combine the two linked lists into a single use list that contains both regular instruction uses and if-uses. To do this, we augment the nir_src with a boolean "is_if", and reimplement the abstract if-uses operations on top of that list. That boolean should fit into the padding already in nir_src so should not actually affect memory use, and in the future we sneak it into the bottom bit of a pointer. However, this creates a new inefficiency: now iterating over regular uses separate from if-uses is (nominally) more expensive. It turns out virtually every caller of nir_foreach_if_use(_safe) also calls nir_foreach_use(_safe) immediately before, so we rewrite most of the callers to instead call a new single `nir_foreach_use_including_if(_safe)` which predicates the logic based on `src->is_if`. This should mitigate the performance difference. There's a bit of churn, but this is largely a mechanical set of changes. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22343> |
||
---|---|---|
.. | ||
tests | ||
.dir-locals.el | ||
.editorconfig | ||
disasm-a3xx.c | ||
instr-a3xx.h | ||
ir3.c | ||
ir3.h | ||
ir3_a4xx.c | ||
ir3_a6xx.c | ||
ir3_array_to_ssa.c | ||
ir3_assembler.c | ||
ir3_assembler.h | ||
ir3_cf.c | ||
ir3_compiler.c | ||
ir3_compiler.h | ||
ir3_compiler_nir.c | ||
ir3_context.c | ||
ir3_context.h | ||
ir3_cp.c | ||
ir3_cse.c | ||
ir3_dce.c | ||
ir3_delay.c | ||
ir3_disk_cache.c | ||
ir3_dominance.c | ||
ir3_image.c | ||
ir3_image.h | ||
ir3_legalize.c | ||
ir3_legalize_relative.c | ||
ir3_lexer.l | ||
ir3_liveness.c | ||
ir3_lower_parallelcopy.c | ||
ir3_lower_spill.c | ||
ir3_lower_subgroups.c | ||
ir3_merge_regs.c | ||
ir3_nir.c | ||
ir3_nir.h | ||
ir3_nir_analyze_ubo_ranges.c | ||
ir3_nir_imul.py | ||
ir3_nir_lower_64b.c | ||
ir3_nir_lower_io_offsets.c | ||
ir3_nir_lower_layer_id.c | ||
ir3_nir_lower_load_barycentric_at_offset.c | ||
ir3_nir_lower_load_barycentric_at_sample.c | ||
ir3_nir_lower_tess.c | ||
ir3_nir_lower_tex_prefetch.c | ||
ir3_nir_lower_wide_load_store.c | ||
ir3_nir_move_varying_inputs.c | ||
ir3_nir_opt_preamble.c | ||
ir3_nir_trig.py | ||
ir3_parser.y | ||
ir3_postsched.c | ||
ir3_print.c | ||
ir3_ra.c | ||
ir3_ra.h | ||
ir3_ra_validate.c | ||
ir3_remove_unreachable.c | ||
ir3_sched.c | ||
ir3_shader.c | ||
ir3_shader.h | ||
ir3_spill.c | ||
ir3_validate.c | ||
meson.build |