ir3: Fix convergence behavior for loops with continues

When loops have continue statements, it's expected that when we execute
a divergent continue (i.e. a continue where not all of the threads
active at the start take it) we keep going with the rest of the loop
body and then reconverge at the start of the next iteration. However the
Adreno ISA seems to always take a branch that jumps backwards, assuming
it's the bottom of a loop, so we get a different, undesired convergence
behavior. There's no way I know of to control this behavior in the
instruction set, so we have to instead insert a "continue block" at the
end of the loop where continue statements reconverge which then jumps
back to the top of the loop. Since this doesn't correspond 1:1 with any
NIR block we have to make control flow handling in NIR->ir3 a bit more
complicated, unfortunately.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>
This commit is contained in:
Connor Abbott 2021-06-28 12:56:15 +02:00 committed by Marge Bot
parent b1b80c06a7
commit 0fa93fb662
3 changed files with 96 additions and 12 deletions

View File

@ -2799,6 +2799,43 @@ emit_phi(struct ir3_context *ctx, nir_phi_instr *nphi)
static struct ir3_block *get_block(struct ir3_context *ctx, const nir_block *nblock);
static struct ir3_instruction *read_phi_src(struct ir3_context *ctx,
struct ir3_block *blk,
struct ir3_instruction *phi,
nir_phi_instr *nphi)
{
if (!blk->nblock) {
struct ir3_instruction *continue_phi =
ir3_instr_create(blk, OPC_META_PHI, 1, blk->predecessors_count);
__ssa_dst(continue_phi)->flags = phi->dsts[0]->flags;
for (unsigned i = 0; i < blk->predecessors_count; i++) {
struct ir3_instruction *src =
read_phi_src(ctx, blk->predecessors[i], phi, nphi);
if (src)
__ssa_src(continue_phi, src, 0);
else
ir3_src_create(continue_phi, INVALID_REG, phi->dsts[0]->flags);
}
return continue_phi;
}
nir_foreach_phi_src(nsrc, nphi) {
if (blk->nblock == nsrc->pred) {
if (nsrc->src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
/* Create an ir3 undef */
return NULL;
} else {
return ir3_get_src(ctx, &nsrc->src)[0];
}
}
}
unreachable("couldn't find phi node ir3 block");
return NULL;
}
static void
resolve_phis(struct ir3_context *ctx, struct ir3_block *block)
{
@ -2808,19 +2845,17 @@ resolve_phis(struct ir3_context *ctx, struct ir3_block *block)
nir_phi_instr *nphi = phi->phi.nphi;
if (!nphi) /* skip continue phis created above */
continue;
for (unsigned i = 0; i < block->predecessors_count; i++) {
struct ir3_block *pred = block->predecessors[i];
nir_foreach_phi_src(nsrc, nphi) {
if (get_block(ctx, nsrc->pred) == pred) {
if (nsrc->src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
/* Create an ir3 undef */
ir3_src_create(phi, INVALID_REG, phi->dsts[0]->flags);
} else {
struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0];
__ssa_src(phi, src, 0);
}
break;
}
struct ir3_instruction *src = read_phi_src(ctx, pred, phi, nphi);
if (src) {
__ssa_src(phi, src, 0);
} else {
/* Create an ir3 undef */
ir3_src_create(phi, INVALID_REG, phi->dsts[0]->flags);
}
}
}
@ -2913,6 +2948,27 @@ get_block(struct ir3_context *ctx, const nir_block *nblock)
return block;
}
static struct ir3_block *
get_block_or_continue(struct ir3_context *ctx, const nir_block *nblock)
{
struct hash_entry *hentry;
hentry = _mesa_hash_table_search(ctx->continue_block_ht, nblock);
if (hentry)
return hentry->data;
return get_block(ctx, nblock);
}
static struct ir3_block *
create_continue_block(struct ir3_context *ctx, const nir_block *nblock)
{
struct ir3_block *block = ir3_block_create(ctx->ir);
block->nblock = NULL;
_mesa_hash_table_insert(ctx->continue_block_ht, nblock, block);
return block;
}
static void
emit_block(struct ir3_context *ctx, nir_block *nblock)
{
@ -2942,7 +2998,7 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
for (int i = 0; i < ARRAY_SIZE(ctx->block->successors); i++) {
if (nblock->successors[i]) {
ctx->block->successors[i] =
get_block(ctx, nblock->successors[i]);
get_block_or_continue(ctx, nblock->successors[i]);
ctx->block->physical_successors[i] = ctx->block->successors[i];
}
}
@ -2990,7 +3046,28 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop)
{
unsigned old_loop_id = ctx->loop_id;
ctx->loop_id = ctx->so->loops + 1;
struct nir_block *nstart = nir_loop_first_block(nloop);
struct ir3_block *continue_blk = NULL;
/* There's always one incoming edge from outside the loop, and if there
* are more than two backedges from inside the loop (so more than 2 total
* edges) then we need to create a continue block after the loop to ensure
* that control reconverges at the end of each loop iteration.
*/
if (nstart->predecessors->entries > 2) {
continue_blk = create_continue_block(ctx, nstart);
}
emit_cf_list(ctx, &nloop->body);
if (continue_blk) {
struct ir3_block *start = get_block(ctx, nstart);
continue_blk->successors[0] = start;
continue_blk->physical_successors[0] = start;
list_addtail(&continue_blk->node, &ctx->ir->block_list);
}
ctx->so->loops++;
ctx->loop_id = old_loop_id;
}

View File

@ -63,6 +63,8 @@ ir3_context_init(struct ir3_compiler *compiler,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->continue_block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->sel_cond_conversions = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);

View File

@ -141,6 +141,11 @@ struct ir3_context {
*/
struct hash_table *block_ht;
/* maps nir_block at the top of a loop to ir3_block collecting continue
* edges.
*/
struct hash_table *continue_block_ht;
/* on a4xx, bitmask of samplers which need astc+srgb workaround: */
unsigned astc_srgb;