pan/midgard: Integrate LCRA
Pretty routine, we do have a hack to force swizzle alignment for !32-bit for until we implement !32-bit the right way. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
parent
66ad64d73d
commit
e343f2ceb9
|
@ -27,6 +27,7 @@
|
|||
#include "midgard.h"
|
||||
#include "helpers.h"
|
||||
#include "midgard_compile.h"
|
||||
#include "lcra.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
@ -580,10 +581,6 @@ mir_has_arg(midgard_instruction *ins, unsigned arg)
|
|||
|
||||
void schedule_program(compiler_context *ctx);
|
||||
|
||||
/* Register allocation */
|
||||
|
||||
struct ra_graph;
|
||||
|
||||
/* Broad types of register classes so we can handle special
|
||||
* registers */
|
||||
|
||||
|
@ -597,8 +594,8 @@ struct ra_graph;
|
|||
#define REG_CLASS_FRAGC 5
|
||||
|
||||
void mir_lower_special_reads(compiler_context *ctx);
|
||||
struct ra_graph* allocate_registers(compiler_context *ctx, bool *spilled);
|
||||
void install_registers(compiler_context *ctx, struct ra_graph *g);
|
||||
struct lcra_state* allocate_registers(compiler_context *ctx, bool *spilled);
|
||||
void install_registers(compiler_context *ctx, struct lcra_state *g);
|
||||
void mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max);
|
||||
void mir_compute_liveness(compiler_context *ctx);
|
||||
void mir_invalidate_liveness(compiler_context *ctx);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "util/register_allocate.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "lcra.h"
|
||||
|
||||
/* For work registers, we can subdivide in various ways. So we create
|
||||
* classes for the various sizes and conflict accordingly, keeping in
|
||||
|
@ -113,42 +114,26 @@ default_phys_reg(int reg, midgard_reg_mode size)
|
|||
* register corresponds to */
|
||||
|
||||
static struct phys_reg
|
||||
index_to_reg(compiler_context *ctx, struct ra_graph *g, unsigned reg, midgard_reg_mode size)
|
||||
index_to_reg(compiler_context *ctx, struct lcra_state *l, unsigned reg, midgard_reg_mode size)
|
||||
{
|
||||
/* Check for special cases */
|
||||
if (reg == ~0)
|
||||
return default_phys_reg(REGISTER_UNUSED, size);
|
||||
else if (reg >= SSA_FIXED_MINIMUM)
|
||||
return default_phys_reg(SSA_REG_FROM_FIXED(reg), size);
|
||||
else if (!g)
|
||||
else if (!l)
|
||||
return default_phys_reg(REGISTER_UNUSED, size);
|
||||
|
||||
/* Special cases aside, we pick the underlying register */
|
||||
int virt = ra_get_node_reg(g, reg);
|
||||
|
||||
/* Divide out the register and classification */
|
||||
int phys = virt / WORK_STRIDE;
|
||||
int type = virt % WORK_STRIDE;
|
||||
|
||||
/* Apply shadow registers */
|
||||
|
||||
if (phys >= SHADOW_R28 && phys <= SHADOW_R29)
|
||||
phys += 28 - SHADOW_R28;
|
||||
else if (phys == SHADOW_R0)
|
||||
phys = 0;
|
||||
|
||||
unsigned bytes = mir_bytes_for_mode(size);
|
||||
|
||||
struct phys_reg r = {
|
||||
.reg = phys,
|
||||
.offset = __builtin_ctz(reg_type_to_mask[type]) * bytes,
|
||||
.size = bytes
|
||||
.reg = l->solutions[reg] / 16,
|
||||
.offset = l->solutions[reg] & 0xF,
|
||||
.size = mir_bytes_for_mode(size)
|
||||
};
|
||||
|
||||
/* Report that we actually use this register, and return it */
|
||||
|
||||
if (phys < 16)
|
||||
ctx->work_registers = MAX2(ctx->work_registers, phys);
|
||||
if (r.reg < 16)
|
||||
ctx->work_registers = MAX2(ctx->work_registers, r.reg);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -525,7 +510,7 @@ mir_lower_special_reads(compiler_context *ctx)
|
|||
static void
|
||||
mir_compute_segment_interference(
|
||||
compiler_context *ctx,
|
||||
struct ra_graph *l,
|
||||
struct lcra_state *l,
|
||||
midgard_bundle *bun,
|
||||
unsigned pivot,
|
||||
unsigned i)
|
||||
|
@ -546,7 +531,9 @@ mir_compute_segment_interference(
|
|||
continue;
|
||||
}
|
||||
|
||||
ra_add_node_interference(l, bun->instructions[q]->dest, bun->instructions[j]->src[s]);
|
||||
unsigned mask = mir_bytemask(bun->instructions[q]);
|
||||
unsigned rmask = mir_bytemask_of_read_components(bun->instructions[j], bun->instructions[j]->src[s]);
|
||||
lcra_add_node_interference(l, bun->instructions[q]->dest, mask, bun->instructions[j]->src[s], rmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -555,7 +542,7 @@ mir_compute_segment_interference(
|
|||
static void
|
||||
mir_compute_bundle_interference(
|
||||
compiler_context *ctx,
|
||||
struct ra_graph *l,
|
||||
struct lcra_state *l,
|
||||
midgard_bundle *bun)
|
||||
{
|
||||
if (!IS_ALU(bun->tag))
|
||||
|
@ -580,7 +567,8 @@ mir_compute_bundle_interference(
|
|||
static void
|
||||
mir_compute_interference(
|
||||
compiler_context *ctx,
|
||||
struct ra_graph *g)
|
||||
struct ra_graph *g,
|
||||
struct lcra_state *l)
|
||||
{
|
||||
/* First, we need liveness information to be computed per block */
|
||||
mir_compute_liveness(ctx);
|
||||
|
@ -600,8 +588,10 @@ mir_compute_interference(
|
|||
|
||||
if (dest < ctx->temp_count) {
|
||||
for (unsigned i = 0; i < ctx->temp_count; ++i)
|
||||
if (live[i])
|
||||
ra_add_node_interference(g, dest, i);
|
||||
if (live[i]) {
|
||||
unsigned mask = mir_bytemask(ins);
|
||||
lcra_add_node_interference(l, dest, mask, i, live[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update live_in */
|
||||
|
@ -609,7 +599,7 @@ mir_compute_interference(
|
|||
}
|
||||
|
||||
mir_foreach_bundle_in_block(blk, bun)
|
||||
mir_compute_bundle_interference(ctx, g, bun);
|
||||
mir_compute_bundle_interference(ctx, l, bun);
|
||||
|
||||
free(live);
|
||||
}
|
||||
|
@ -618,7 +608,7 @@ mir_compute_interference(
|
|||
/* This routine performs the actual register allocation. It should be succeeded
|
||||
* by install_registers */
|
||||
|
||||
struct ra_graph *
|
||||
struct lcra_state *
|
||||
allocate_registers(compiler_context *ctx, bool *spilled)
|
||||
{
|
||||
/* The number of vec4 work registers available depends on when the
|
||||
|
@ -644,6 +634,21 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
* size (vec2/vec3..). First, we'll go through and determine the
|
||||
* minimum size needed to hold values */
|
||||
|
||||
struct lcra_state *l = lcra_alloc_equations(ctx->temp_count, 1, 8, 16, 5);
|
||||
|
||||
/* Starts of classes, in bytes */
|
||||
l->class_start[REG_CLASS_WORK] = 16 * 0;
|
||||
l->class_start[REG_CLASS_LDST] = 16 * 26;
|
||||
l->class_start[REG_CLASS_TEXR] = 16 * 28;
|
||||
l->class_start[REG_CLASS_TEXW] = 16 * 28;
|
||||
|
||||
l->class_size[REG_CLASS_WORK] = 16 * work_count;
|
||||
l->class_size[REG_CLASS_LDST] = 16 * 2;
|
||||
l->class_size[REG_CLASS_TEXR] = 16 * 2;
|
||||
l->class_size[REG_CLASS_TEXW] = 16 * 2;
|
||||
|
||||
lcra_set_disjoint_class(l, REG_CLASS_TEXR, REG_CLASS_TEXW);
|
||||
|
||||
unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
|
||||
|
||||
mir_foreach_instr_global(ctx, ins) {
|
||||
|
@ -657,8 +662,17 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
|
||||
int dest = ins->dest;
|
||||
found_class[dest] = MAX2(found_class[dest], class);
|
||||
|
||||
lcra_set_alignment(l, dest, 2); /* (1 << 2) = 4 */
|
||||
|
||||
/* XXX: Ensure swizzles align the right way with more LCRA constraints? */
|
||||
if (ins->type == TAG_ALU_4 && ins->alu.reg_mode != midgard_reg_mode_32)
|
||||
lcra_set_alignment(l, dest, 3); /* (1 << 3) = 8 */
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ctx->temp_count; ++i)
|
||||
lcra_restrict_range(l, i, (found_class[i] + 1) * 4);
|
||||
|
||||
/* Next, we'll determine semantic class. We default to zero (work).
|
||||
* But, if we're used with a special operation, that will force us to a
|
||||
* particular class. Each node must be assigned to exactly one class; a
|
||||
|
@ -681,6 +695,8 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
force_vec4(found_class, ins->src[0]);
|
||||
force_vec4(found_class, ins->src[1]);
|
||||
force_vec4(found_class, ins->src[2]);
|
||||
|
||||
lcra_restrict_range(l, ins->dest, 16);
|
||||
}
|
||||
} else if (ins->type == TAG_TEXTURE_4) {
|
||||
set_class(found_class, ins->dest, REG_CLASS_TEXW);
|
||||
|
@ -700,27 +716,21 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
|
||||
/* Mark writeout to r0 */
|
||||
mir_foreach_instr_global(ctx, ins) {
|
||||
if (ins->compact_branch && ins->writeout)
|
||||
set_class(found_class, ins->src[0], REG_CLASS_FRAGC);
|
||||
if (ins->compact_branch && ins->writeout && ins->src[0] < ctx->temp_count)
|
||||
l->solutions[ins->src[0]] = 0;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ctx->temp_count; ++i) {
|
||||
unsigned class = found_class[i];
|
||||
l->class[i] = (class >> 2);
|
||||
|
||||
ra_set_node_class(g, i, classes[class]);
|
||||
}
|
||||
|
||||
mir_compute_interference(ctx, g);
|
||||
mir_compute_interference(ctx, g, l);
|
||||
|
||||
if (!ra_allocate(g)) {
|
||||
*spilled = true;
|
||||
} else {
|
||||
*spilled = false;
|
||||
}
|
||||
|
||||
/* Whether we were successful or not, report the graph so we can
|
||||
* compute spill nodes */
|
||||
|
||||
return g;
|
||||
*spilled = !lcra_solve(l);
|
||||
return l;
|
||||
}
|
||||
|
||||
/* Once registers have been decided via register allocation
|
||||
|
@ -730,7 +740,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
|||
static void
|
||||
install_registers_instr(
|
||||
compiler_context *ctx,
|
||||
struct ra_graph *g,
|
||||
struct lcra_state *l,
|
||||
midgard_instruction *ins)
|
||||
{
|
||||
switch (ins->type) {
|
||||
|
@ -741,9 +751,9 @@ install_registers_instr(
|
|||
if (ins->compact_branch)
|
||||
return;
|
||||
|
||||
struct phys_reg src1 = index_to_reg(ctx, g, ins->src[0], mir_srcsize(ins, 0));
|
||||
struct phys_reg src2 = index_to_reg(ctx, g, ins->src[1], mir_srcsize(ins, 1));
|
||||
struct phys_reg dest = index_to_reg(ctx, g, ins->dest, mir_typesize(ins));
|
||||
struct phys_reg src1 = index_to_reg(ctx, l, ins->src[0], mir_srcsize(ins, 0));
|
||||
struct phys_reg src2 = index_to_reg(ctx, l, ins->src[1], mir_srcsize(ins, 1));
|
||||
struct phys_reg dest = index_to_reg(ctx, l, ins->dest, mir_typesize(ins));
|
||||
|
||||
mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
|
||||
|
||||
|
@ -789,13 +799,13 @@ install_registers_instr(
|
|||
bool encodes_src = OP_IS_STORE(ins->load_store.op);
|
||||
|
||||
if (encodes_src) {
|
||||
struct phys_reg src = index_to_reg(ctx, g, ins->src[0], mir_srcsize(ins, 0));
|
||||
struct phys_reg src = index_to_reg(ctx, l, ins->src[0], mir_srcsize(ins, 0));
|
||||
assert(src.reg == 26 || src.reg == 27);
|
||||
|
||||
ins->load_store.reg = src.reg - 26;
|
||||
offset_swizzle(ins->swizzle[0], src.offset, src.size, 0);
|
||||
} else {
|
||||
struct phys_reg dst = index_to_reg(ctx, g, ins->dest, mir_typesize(ins));
|
||||
struct phys_reg dst = index_to_reg(ctx, l, ins->dest, mir_typesize(ins));
|
||||
|
||||
ins->load_store.reg = dst.reg;
|
||||
offset_swizzle(ins->swizzle[0], 0, 4, dst.offset);
|
||||
|
@ -808,14 +818,14 @@ install_registers_instr(
|
|||
unsigned src3 = ins->src[2];
|
||||
|
||||
if (src2 != ~0) {
|
||||
struct phys_reg src = index_to_reg(ctx, g, src2, mir_srcsize(ins, 1));
|
||||
struct phys_reg src = index_to_reg(ctx, l, src2, mir_srcsize(ins, 1));
|
||||
unsigned component = src.offset / src.size;
|
||||
assert(component * src.size == src.offset);
|
||||
ins->load_store.arg_1 |= midgard_ldst_reg(src.reg, component);
|
||||
}
|
||||
|
||||
if (src3 != ~0) {
|
||||
struct phys_reg src = index_to_reg(ctx, g, src3, mir_srcsize(ins, 2));
|
||||
struct phys_reg src = index_to_reg(ctx, l, src3, mir_srcsize(ins, 2));
|
||||
unsigned component = src.offset / src.size;
|
||||
assert(component * src.size == src.offset);
|
||||
ins->load_store.arg_2 |= midgard_ldst_reg(src.reg, component);
|
||||
|
@ -826,9 +836,9 @@ install_registers_instr(
|
|||
|
||||
case TAG_TEXTURE_4: {
|
||||
/* Grab RA results */
|
||||
struct phys_reg dest = index_to_reg(ctx, g, ins->dest, mir_typesize(ins));
|
||||
struct phys_reg coord = index_to_reg(ctx, g, ins->src[1], mir_srcsize(ins, 1));
|
||||
struct phys_reg lod = index_to_reg(ctx, g, ins->src[2], mir_srcsize(ins, 2));
|
||||
struct phys_reg dest = index_to_reg(ctx, l, ins->dest, mir_typesize(ins));
|
||||
struct phys_reg coord = index_to_reg(ctx, l, ins->src[1], mir_srcsize(ins, 1));
|
||||
struct phys_reg lod = index_to_reg(ctx, l, ins->src[2], mir_srcsize(ins, 2));
|
||||
|
||||
assert(dest.reg == 28 || dest.reg == 29);
|
||||
assert(coord.reg == 28 || coord.reg == 29);
|
||||
|
@ -869,8 +879,8 @@ install_registers_instr(
|
|||
}
|
||||
|
||||
void
|
||||
install_registers(compiler_context *ctx, struct ra_graph *g)
|
||||
install_registers(compiler_context *ctx, struct lcra_state *l)
|
||||
{
|
||||
mir_foreach_instr_global(ctx, ins)
|
||||
install_registers_instr(ctx, g, ins);
|
||||
install_registers_instr(ctx, l, ins);
|
||||
}
|
||||
|
|
|
@ -1187,7 +1187,7 @@ v_load_store_scratch(
|
|||
|
||||
static void mir_spill_register(
|
||||
compiler_context *ctx,
|
||||
struct ra_graph *g,
|
||||
struct lcra_state *l,
|
||||
unsigned *spill_count)
|
||||
{
|
||||
unsigned spill_index = ctx->temp_count;
|
||||
|
@ -1197,7 +1197,7 @@ static void mir_spill_register(
|
|||
* nodes written to from an unspill */
|
||||
|
||||
for (unsigned i = 0; i < ctx->temp_count; ++i) {
|
||||
ra_set_node_spill_cost(g, i, 1.0);
|
||||
lcra_set_node_spill_cost(l, i, 1);
|
||||
}
|
||||
|
||||
/* We can't spill any bundles that contain unspills. This could be
|
||||
|
@ -1218,7 +1218,7 @@ static void mir_spill_register(
|
|||
unsigned src = bun->instructions[i]->src[s];
|
||||
|
||||
if (src < ctx->temp_count)
|
||||
ra_set_node_spill_cost(g, src, -1.0);
|
||||
lcra_set_node_spill_cost(l, src, -1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1229,12 +1229,12 @@ static void mir_spill_register(
|
|||
for (unsigned i = 0; i < bun->instruction_count; ++i) {
|
||||
unsigned dest = bun->instructions[i]->dest;
|
||||
if (dest < ctx->temp_count)
|
||||
ra_set_node_spill_cost(g, dest, -1.0);
|
||||
lcra_set_node_spill_cost(l, dest, -1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int spill_node = ra_get_best_spill_node(g);
|
||||
int spill_node = lcra_get_best_spill_node(l);
|
||||
|
||||
if (spill_node < 0) {
|
||||
mir_print_shader(ctx);
|
||||
|
@ -1245,9 +1245,8 @@ static void mir_spill_register(
|
|||
* legitimately spill to TLS, but special registers just spill to work
|
||||
* registers */
|
||||
|
||||
unsigned class = ra_get_node_class(g, spill_node);
|
||||
bool is_special = (class >> 2) != REG_CLASS_WORK;
|
||||
bool is_special_w = (class >> 2) == REG_CLASS_TEXW;
|
||||
bool is_special = l->class[spill_node] != REG_CLASS_WORK;
|
||||
bool is_special_w = l->class[spill_node] == REG_CLASS_TEXW;
|
||||
|
||||
/* Allocate TLS slot (maybe) */
|
||||
unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
|
||||
|
@ -1373,7 +1372,7 @@ static void mir_spill_register(
|
|||
void
|
||||
schedule_program(compiler_context *ctx)
|
||||
{
|
||||
struct ra_graph *g = NULL;
|
||||
struct lcra_state *l = NULL;
|
||||
bool spilled = false;
|
||||
int iter_count = 1000; /* max iterations */
|
||||
|
||||
|
@ -1398,13 +1397,13 @@ schedule_program(compiler_context *ctx)
|
|||
|
||||
do {
|
||||
if (spilled)
|
||||
mir_spill_register(ctx, g, &spill_count);
|
||||
mir_spill_register(ctx, l, &spill_count);
|
||||
|
||||
mir_squeeze_index(ctx);
|
||||
mir_invalidate_liveness(ctx);
|
||||
|
||||
g = NULL;
|
||||
g = allocate_registers(ctx, &spilled);
|
||||
l = NULL;
|
||||
l = allocate_registers(ctx, &spilled);
|
||||
} while(spilled && ((iter_count--) > 0));
|
||||
|
||||
if (iter_count <= 0) {
|
||||
|
@ -1417,5 +1416,5 @@ schedule_program(compiler_context *ctx)
|
|||
|
||||
ctx->tls_size = spill_count * 16;
|
||||
|
||||
install_registers(ctx, g);
|
||||
install_registers(ctx, l);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue