mirror of https://gitlab.freedesktop.org/mesa/mesa
ir3: Rewrite shared reg handling when translating from NIR
In the future we will have many ALU instructions passing shared registers to each other, and surrounding them each with moves to/from shared registers will severely bloat the IR size coming out of NIR and make more pointless work for copy propagation. Instead, do something more like the ACO approach and allow values stored in the hash table to be shared, and move the burden of emitting a mov to non-shared to ir3_get_src(). We will then use ir3_get_src_shared() or ir3_get_src_maybe_shared() as appropriate in cases where we can handle shared registers or where we can handle both. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
parent
4828942d0c
commit
3bec9e684d
|
@ -2644,6 +2644,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
src, 0);
|
||||
dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
|
||||
dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE;
|
||||
/* Work around a bug with half-register shared -> non-shared moves by
|
||||
* adding an extra mov here so that the original destination stays full.
|
||||
*/
|
||||
if (src->dsts[0]->flags & IR3_REG_HALF) {
|
||||
dst[0] = ir3_MOV(b, dst[0], TYPE_U32);
|
||||
if (!ctx->compiler->has_scalar_alu)
|
||||
dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2651,6 +2659,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
dst[0] = ir3_READ_FIRST_MACRO(ctx->block, src, 0);
|
||||
dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
|
||||
/* See above. */
|
||||
if (src->dsts[0]->flags & IR3_REG_HALF) {
|
||||
dst[0] = ir3_MOV(b, dst[0], TYPE_U32);
|
||||
if (!ctx->compiler->has_scalar_alu)
|
||||
dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3586,7 +3600,12 @@ read_phi_src(struct ir3_context *ctx, struct ir3_block *blk,
|
|||
/* Create an ir3 undef */
|
||||
return NULL;
|
||||
} else {
|
||||
return ir3_get_src(ctx, &nsrc->src)[0];
|
||||
/* We need to insert the move at the end of the block */
|
||||
struct ir3_block *old_block = ctx->block;
|
||||
ctx->block = blk;
|
||||
struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0];
|
||||
ctx->block = old_block;
|
||||
return src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -227,7 +227,7 @@ ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n)
|
|||
}
|
||||
|
||||
struct ir3_instruction *const *
|
||||
ir3_get_src(struct ir3_context *ctx, nir_src *src)
|
||||
ir3_get_src_maybe_shared(struct ir3_context *ctx, nir_src *src)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
|
||||
|
@ -235,24 +235,49 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src)
|
|||
return entry->data;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
get_shared(struct ir3_block *block, struct ir3_instruction *src, bool shared)
|
||||
{
|
||||
if (!!(src->dsts[0]->flags & IR3_REG_SHARED) != shared) {
|
||||
struct ir3_instruction *mov =
|
||||
ir3_MOV(block, src, (src->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
|
||||
mov->dsts[0]->flags &= ~IR3_REG_SHARED;
|
||||
mov->dsts[0]->flags |= COND(shared, IR3_REG_SHARED);
|
||||
return mov;
|
||||
}
|
||||
|
||||
return src;
|
||||
}
|
||||
|
||||
struct ir3_instruction *const *
|
||||
ir3_get_src_shared(struct ir3_context *ctx, nir_src *src, bool shared)
|
||||
{
|
||||
unsigned num_components = nir_src_num_components(*src);
|
||||
struct ir3_instruction *const *value = ir3_get_src_maybe_shared(ctx, src);
|
||||
bool mismatch = false;
|
||||
for (unsigned i = 0; i < nir_src_num_components(*src); i++) {
|
||||
if (!!(value[i]->dsts[0]->flags & IR3_REG_SHARED) != shared) {
|
||||
mismatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mismatch)
|
||||
return value;
|
||||
|
||||
struct ir3_instruction **new_value =
|
||||
ralloc_array(ctx, struct ir3_instruction *, num_components);
|
||||
for (unsigned i = 0; i < num_components; i++)
|
||||
new_value[i] = get_shared(ctx->block, value[i], shared);
|
||||
|
||||
return new_value;
|
||||
}
|
||||
|
||||
void
|
||||
ir3_put_def(struct ir3_context *ctx, nir_def *def)
|
||||
{
|
||||
unsigned bit_size = ir3_bitsize(ctx, def->bit_size);
|
||||
|
||||
/* add extra mov if dst value is shared reg.. in some cases not all
|
||||
* instructions can read from shared regs, in cases where they can
|
||||
* ir3_cp will clean up the extra mov:
|
||||
*/
|
||||
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
|
||||
if (!ctx->last_dst[i])
|
||||
continue;
|
||||
if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
|
||||
ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
|
||||
ctx->last_dst[i]->dsts[0]->flags &= ~IR3_REG_SHARED;
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_size <= 16) {
|
||||
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
|
||||
struct ir3_instruction *dst = ctx->last_dst[i];
|
||||
|
@ -627,7 +652,8 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
|
|||
dst->array.id = arr->id;
|
||||
dst->array.offset = n;
|
||||
dst->array.base = INVALID_REG;
|
||||
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags)->def = src->dsts[0];
|
||||
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags |
|
||||
(src->dsts[0]->flags & IR3_REG_SHARED))->def = src->dsts[0];
|
||||
|
||||
if (arr->last_write && arr->last_write->instr->block == block)
|
||||
ir3_reg_set_last_array(mov, dst, arr->last_write);
|
||||
|
|
|
@ -201,8 +201,17 @@ struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
|
|||
nir_def *dst, unsigned n);
|
||||
struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def,
|
||||
unsigned n);
|
||||
struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx,
|
||||
nir_src *src);
|
||||
struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx,
|
||||
nir_src *src);
|
||||
struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx,
|
||||
nir_src *src, bool shared);
|
||||
|
||||
static inline struct ir3_instruction *const *
|
||||
ir3_get_src(struct ir3_context *ctx, nir_src *src)
|
||||
{
|
||||
return ir3_get_src_shared(ctx, src, false);
|
||||
}
|
||||
|
||||
void ir3_put_def(struct ir3_context *ctx, nir_def *def);
|
||||
struct ir3_instruction *ir3_create_collect(struct ir3_block *block,
|
||||
struct ir3_instruction *const *arr,
|
||||
|
|
Loading…
Reference in New Issue