ir3: Rewrite shared reg handling when translating from NIR

In the future we will have many ALU instructions passing shared
registers to each other, and surrounding them each with moves to/from
shared registers will severely bloat the IR size coming out of NIR and
make more pointless work for copy propagation. Instead, do something
more like the ACO approach and allow values stored in the hash table to
be shared, and move the burden of emitting a mov to non-shared to
ir3_get_src(). We will then use ir3_get_src_shared() or
ir3_get_src_maybe_shared() as appropriate in cases where we can handle
shared registers or where we can handle both.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
Connor Abbott 2023-03-02 15:09:39 +01:00 committed by Marge Bot
parent 4828942d0c
commit 3bec9e684d
3 changed files with 72 additions and 18 deletions

View File

@ -2644,6 +2644,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
src, 0);
dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE;
/* Work around a bug with half-register shared -> non-shared moves by
* adding an extra mov here so that the original destination stays full.
*/
if (src->dsts[0]->flags & IR3_REG_HALF) {
dst[0] = ir3_MOV(b, dst[0], TYPE_U32);
if (!ctx->compiler->has_scalar_alu)
dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED;
}
break;
}
@ -2651,6 +2659,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
dst[0] = ir3_READ_FIRST_MACRO(ctx->block, src, 0);
dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
/* See above. */
if (src->dsts[0]->flags & IR3_REG_HALF) {
dst[0] = ir3_MOV(b, dst[0], TYPE_U32);
if (!ctx->compiler->has_scalar_alu)
dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED;
}
break;
}
@ -3586,7 +3600,12 @@ read_phi_src(struct ir3_context *ctx, struct ir3_block *blk,
/* Create an ir3 undef */
return NULL;
} else {
return ir3_get_src(ctx, &nsrc->src)[0];
/* We need to insert the move at the end of the block */
struct ir3_block *old_block = ctx->block;
ctx->block = blk;
struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0];
ctx->block = old_block;
return src;
}
}
}

View File

@ -227,7 +227,7 @@ ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n)
}
struct ir3_instruction *const *
ir3_get_src(struct ir3_context *ctx, nir_src *src)
ir3_get_src_maybe_shared(struct ir3_context *ctx, nir_src *src)
{
struct hash_entry *entry;
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
@ -235,24 +235,49 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src)
return entry->data;
}
static struct ir3_instruction *
get_shared(struct ir3_block *block, struct ir3_instruction *src, bool shared)
{
if (!!(src->dsts[0]->flags & IR3_REG_SHARED) != shared) {
struct ir3_instruction *mov =
ir3_MOV(block, src, (src->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
mov->dsts[0]->flags &= ~IR3_REG_SHARED;
mov->dsts[0]->flags |= COND(shared, IR3_REG_SHARED);
return mov;
}
return src;
}
struct ir3_instruction *const *
ir3_get_src_shared(struct ir3_context *ctx, nir_src *src, bool shared)
{
unsigned num_components = nir_src_num_components(*src);
struct ir3_instruction *const *value = ir3_get_src_maybe_shared(ctx, src);
bool mismatch = false;
for (unsigned i = 0; i < nir_src_num_components(*src); i++) {
if (!!(value[i]->dsts[0]->flags & IR3_REG_SHARED) != shared) {
mismatch = true;
break;
}
}
if (!mismatch)
return value;
struct ir3_instruction **new_value =
ralloc_array(ctx, struct ir3_instruction *, num_components);
for (unsigned i = 0; i < num_components; i++)
new_value[i] = get_shared(ctx->block, value[i], shared);
return new_value;
}
void
ir3_put_def(struct ir3_context *ctx, nir_def *def)
{
unsigned bit_size = ir3_bitsize(ctx, def->bit_size);
/* add extra mov if dst value is shared reg.. in some cases not all
* instructions can read from shared regs, in cases where they can
* ir3_cp will clean up the extra mov:
*/
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
if (!ctx->last_dst[i])
continue;
if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
ctx->last_dst[i]->dsts[0]->flags &= ~IR3_REG_SHARED;
}
}
if (bit_size <= 16) {
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
struct ir3_instruction *dst = ctx->last_dst[i];
@ -627,7 +652,8 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
dst->array.id = arr->id;
dst->array.offset = n;
dst->array.base = INVALID_REG;
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags)->def = src->dsts[0];
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags |
(src->dsts[0]->flags & IR3_REG_SHARED))->def = src->dsts[0];
if (arr->last_write && arr->last_write->instr->block == block)
ir3_reg_set_last_array(mov, dst, arr->last_write);

View File

@ -201,8 +201,17 @@ struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
nir_def *dst, unsigned n);
struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def,
unsigned n);
struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx,
nir_src *src);
struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx,
nir_src *src);
struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx,
nir_src *src, bool shared);
static inline struct ir3_instruction *const *
ir3_get_src(struct ir3_context *ctx, nir_src *src)
{
return ir3_get_src_shared(ctx, src, false);
}
void ir3_put_def(struct ir3_context *ctx, nir_def *def);
struct ir3_instruction *ir3_create_collect(struct ir3_block *block,
struct ir3_instruction *const *arr,