diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index ce64db72780..c23d3afd896 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1865,6 +1865,7 @@ bool ir3_lower_subgroups(struct ir3 *ir); /* legalize: */ bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary); +bool ir3_legalize_relative(struct ir3 *ir); static inline bool ir3_has_latency_to_hide(struct ir3 *ir) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index d43f44feb52..45f6c80bf6d 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -4892,6 +4892,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, IR3_PASS(ir, ir3_postsched, so); + IR3_PASS(ir, ir3_legalize_relative); IR3_PASS(ir, ir3_lower_subgroups); if (so->type == MESA_SHADER_FRAGMENT) diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index e43889d7f75..6939ce5eee7 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -39,7 +39,7 @@ * 1) Iteratively determine where sync ((sy)/(ss)) flags are needed, * based on state flowing out of predecessor blocks until there is * no further change. In some cases this requires inserting nops. - * 2) Mark (ei) on last varying input, and (ul) on last use of a0.x + * 2) Mark (ei) on last varying input * 3) Final nop scheduling for instruction latency * 4) Resolve jumps and schedule blocks, marking potential convergence * points with (jp) @@ -88,7 +88,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (bd->valid) return false; - struct ir3_instruction *last_rel = NULL; struct ir3_instruction *last_n = NULL; struct list_head instr_list; struct ir3_legalize_state prev_state = bd->state; @@ -207,13 +206,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) regmask_init(&state->needs_sy, mergedregs); } } - - /* TODO: is it valid to have address reg loaded from a - * relative src (ie. mova a0, c)? If so, the - * last_rel check below should be moved ahead of this: - */ - if (reg->flags & IR3_REG_RELATIV) - last_rel = n; } foreach_dst (reg, n) { @@ -223,11 +215,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) regmask_init(&state->needs_ss_war, mergedregs); regmask_init(&state->needs_ss, mergedregs); } - - if (last_rel && (reg->num == regid(REG_A0, 0))) { - last_rel->flags |= IR3_INSTR_UL; - last_rel = NULL; - } } /* cat5+ does not have an (ss) bit, if needed we need to @@ -367,9 +354,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) list_add(&baryf->node, &block->instr_list); } - if (last_rel) - last_rel->flags |= IR3_INSTR_UL; - bd->valid = true; if (memcmp(&prev_state, state, sizeof(*state))) { diff --git a/src/freedreno/ir3/ir3_legalize_relative.c b/src/freedreno/ir3/ir3_legalize_relative.c new file mode 100644 index 00000000000..8e478a89c00 --- /dev/null +++ b/src/freedreno/ir3/ir3_legalize_relative.c @@ -0,0 +1,69 @@ +/* + * Copyright 2022 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "ir3.h" + +/* + * Mark (ul) on the last user of a0 before a0 is loaded again. emit_block + * makes sure a0 is loaded first if there is any user in the block. This + * allows us to process each block independently. + * + * Note that this must be called before passes that break the assumption, such + * as ir3_lower_subgroups. + */ + +static bool +is_reg_relative(const struct ir3_instruction *instr) +{ + foreach_dst (reg, instr) { + if (reg->flags & IR3_REG_RELATIV) + return true; + } + + foreach_src (reg, instr) { + if (reg->flags & IR3_REG_RELATIV) + return true; + } + + return false; +} + +static bool +is_dst_a0(const struct ir3_instruction *instr) +{ + foreach_dst (reg, instr) { + if (reg->num == regid(REG_A0, 0)) + return true; + } + + return false; +} + +bool +ir3_legalize_relative(struct ir3 *ir) +{ + foreach_block (block, &ir->block_list) { + struct ir3_instruction *last_user = NULL; + + foreach_instr (instr, &block->instr_list) { + if (is_reg_relative(instr)) + last_user = instr; + + /* Is it valid to have address reg loaded from a relative src (ie. + * mova a0, c)? This marks the load (ul), which may or may + * not be valid. + */ + if (last_user && is_dst_a0(instr)) { + last_user->flags |= IR3_INSTR_UL; + last_user = NULL; + } + } + + if (last_user) + last_user->flags |= IR3_INSTR_UL; + } + + return true; +} diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 7dc641bbe83..0f6e1900a5e 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -85,6 +85,7 @@ libfreedreno_ir3_files = files( 'ir3_image.h', 'ir3.h', 'ir3_legalize.c', + 'ir3_legalize_relative.c', 'ir3_liveness.c', 'ir3_lower_parallelcopy.c', 'ir3_lower_spill.c',