pan/bi: Interpret register allocation results

Once LCRA has run, we have a map from IR indices to byte offsets into
the register file, so we need to "install" these results, rewriting the
IR to use native registers and fixing up writemasks/swizzles to
substitute vectorization for adjacent registers (for LCRA, we're
modeling in terms of real vectors).

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4158>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4158>
This commit is contained in:
Alyssa Rosenzweig 2020-03-12 08:05:58 -04:00 committed by Marge Bot
parent e8139ef645
commit dcc50f4302
2 changed files with 96 additions and 0 deletions

View File

@ -89,6 +89,99 @@ bi_allocate_registers(bi_context *ctx, bool *success)
return l;
}
static unsigned
bi_reg_from_index(struct lcra_state *l, unsigned index, unsigned offset)
{
/* Did we run RA for this index at all */
if (index >= l->node_count)
return index;
/* LCRA didn't bother solving this index (how lazy!) */
signed solution = l->solutions[index];
if (solution < 0)
return index;
solution += offset;
assert((solution & 0x3) == 0);
unsigned reg = solution / 4;
return BIR_INDEX_REGISTER | reg;
}
static void
bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
{
if (ins->src[src] >= l->node_count)
return;
bool vector = (bi_class_props[ins->type] & BI_VECTOR);
unsigned offset = 0;
if (vector) {
/* TODO: Do we do anything here? */
} else {
/* Use the swizzle as component select */
nir_alu_type T = ins->src_types[src];
unsigned size = nir_alu_type_get_type_size(T);
unsigned bytes = (MAX2(size, 8) / 8);
unsigned comps_per_reg = 4 / bytes;
unsigned components = bi_get_component_count(ins);
for (unsigned i = 0; i < components; ++i) {
unsigned off = ins->swizzle[src][i] / comps_per_reg;
off *= 4; /* 32-bit registers */
/* We can't cross register boundaries in a swizzle */
if (i == 0)
offset = off;
else
assert(off == offset);
ins->swizzle[src][i] %= comps_per_reg;
}
}
ins->src[src] = bi_reg_from_index(l, ins->src[src], offset);
}
static void
bi_adjust_dest_ra(bi_instruction *ins, struct lcra_state *l)
{
if (ins->dest >= l->node_count)
return;
bool vector = (bi_class_props[ins->type] & BI_VECTOR);
unsigned offset = 0;
if (!vector) {
/* Look at the writemask to get an offset, specifically the
* trailing zeros */
unsigned tz = __builtin_ctz(ins->writemask);
/* Recall writemask is one bit per byte, so tz is in bytes */
unsigned regs = tz / 4;
offset = regs * 4;
/* Adjust writemask to compensate */
ins->writemask >>= offset;
}
ins->dest = bi_reg_from_index(l, ins->dest, offset);
}
static void
bi_install_registers(bi_context *ctx, struct lcra_state *l)
{
bi_foreach_instr_global(ctx, ins) {
bi_adjust_dest_ra(ins, l);
bi_foreach_src(ins, s)
bi_adjust_src_ra(ins, l, s);
}
}
void
bi_register_allocate(bi_context *ctx)
{
@ -108,5 +201,7 @@ bi_register_allocate(bi_context *ctx)
assert(success);
} while(!success);
bi_install_registers(ctx, l);
lcra_free(l);
}

View File

@ -864,6 +864,7 @@ bifrost_compile_shader_nir(nir_shader *nir, panfrost_program *program, unsigned
bi_print_shader(ctx, stdout);
bi_schedule(ctx);
bi_register_allocate(ctx);
bi_print_shader(ctx, stdout);
ralloc_free(ctx);
}