From 9df3323564d5db9fba6885b97d206dbb5d90f7e2 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 11 Apr 2024 16:00:06 -0400 Subject: [PATCH] ir3/legalize: Fix intra-block state propagation with loops At the beginning of each block we were taking the state for the current block, which after traversing the block already will contain the state at the *end* of the block, and combining it with the predecessors to get the state for the *start* of the block. This will cause over-synchronization. Part-of: --- src/freedreno/ir3/ir3_legalize.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index f843d78baef12..53e9c51fd49d2 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -63,6 +63,7 @@ struct ir3_legalize_state { struct ir3_legalize_block_data { bool valid; + struct ir3_legalize_state begin_state; struct ir3_legalize_state state; }; @@ -112,12 +113,20 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct ir3_instruction *last_n = NULL; struct list_head instr_list; struct ir3_legalize_state prev_state = bd->state; - struct ir3_legalize_state *state = &bd->state; + struct ir3_legalize_state *state = &bd->begin_state; bool last_input_needs_ss = false; bool has_tex_prefetch = false; bool mergedregs = ctx->so->mergedregs; - /* our input state is the OR of all predecessor blocks' state: */ + /* Our input state is the OR of all predecessor blocks' state. + * + * Why don't we just zero the state at the beginning before merging in the + * predecessors? Because otherwise updates may not be a "lattice refinement", + * i.e. needs_ss may go from true to false for some register due to a (ss) we + * inserted the second time around (and the same for (sy)). This means that + * there's no solid guarantee the algorithm will converge, and in theory + * there may be infinite loops where we fight over the placment of an (ss). + */ for (unsigned i = 0; i < block->predecessors_count; i++) { struct ir3_block *predecessor = block->predecessors[i]; struct ir3_legalize_block_data *pbd = predecessor->data; @@ -144,6 +153,9 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) regmask_or_shared(&state->needs_ss, &state->needs_ss, &pstate->needs_ss); } + memcpy(&bd->state, state, sizeof(*state)); + state = &bd->state; + unsigned input_count = 0; foreach_instr (n, &block->instr_list) { @@ -1161,6 +1173,9 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) regmask_init(&bd->state.needs_ss_war, mergedregs); regmask_init(&bd->state.needs_ss, mergedregs); regmask_init(&bd->state.needs_sy, mergedregs); + regmask_init(&bd->begin_state.needs_ss_war, mergedregs); + regmask_init(&bd->begin_state.needs_ss, mergedregs); + regmask_init(&bd->begin_state.needs_sy, mergedregs); block->data = bd; }