freedreno/ir3: cache defining instruction

It is silly to traverse back to find first instruction that writes part
of a larger "virtual" register many times per instruction (plus per use
as a src to later instructions).  Cache this information so we only
figure it out once.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2015-06-29 14:49:08 -04:00
parent 906da49527
commit 00b6b41482
3 changed files with 91 additions and 69 deletions

View File

@ -722,15 +722,16 @@ ir3_clear_mark(struct ir3 *ir)
}
/* note: this will destroy instr->depth, don't do it until after sched! */
void
unsigned
ir3_count_instructions(struct ir3 *ir)
{
unsigned ip = 0;
unsigned cnt = 0;
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
instr->ip = ip++;
instr->ip = cnt++;
}
block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
}
return cnt;
}

View File

@ -431,7 +431,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
void ir3_block_clear_mark(struct ir3_block *block);
void ir3_clear_mark(struct ir3 *shader);
void ir3_count_instructions(struct ir3 *ir);
unsigned ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)

View File

@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
return set;
}
/* additional block-data (per-block) */
struct ir3_ra_block_data {
BITSET_WORD *def; /* variables defined before used in block */
BITSET_WORD *use; /* variables used before defined in block */
BITSET_WORD *livein; /* which defs reach entry point of block */
BITSET_WORD *liveout; /* which defs reach exit point of block */
};
/* additional instruction-data (per-instruction) */
struct ir3_ra_instr_data {
/* cached instruction 'definer' info: */
struct ir3_instruction *defn;
int off, sz, cls;
};
/* register-assign context, per-shader */
struct ir3_ra_ctx {
struct ir3 *ir;
@ -254,14 +269,7 @@ struct ir3_ra_ctx {
unsigned class_base[total_class_count];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
};
/* additional block-data (per-block) */
struct ir3_ra_block_data {
BITSET_WORD *def; /* variables defined before used in block */
BITSET_WORD *use; /* variables used before defined in block */
BITSET_WORD *livein; /* which defs reach entry point of block */
BITSET_WORD *liveout; /* which defs reach exit point of block */
struct ir3_ra_instr_data *instrd;
};
static bool
@ -307,12 +315,20 @@ writes_gpr(struct ir3_instruction *instr)
}
static struct ir3_instruction *
get_definer(struct ir3_instruction *instr, int *sz, int *off)
get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
int *sz, int *off)
{
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
if (instr->fanin)
return get_definer(instr->fanin, sz, off);
return get_definer(ctx, instr->fanin, sz, off);
if (id->defn) {
*sz = id->sz;
*off = id->off;
return id->defn;
}
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
/* What about the case where collect is subset of array, we
@ -330,7 +346,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
if (!src->instr)
continue;
dd = get_definer(src->instr, &dsz, &doff);
dd = get_definer(ctx, src->instr, &dsz, &doff);
if ((!d) || (dd->ip < d->ip)) {
d = dd;
@ -339,7 +355,6 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
}
}
} else if (instr->cp.right || instr->cp.left) {
/* covers also the meta:fo case, which ends up w/ single
* scalar instructions for each component:
@ -394,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
dd = get_definer(phi, &dsz, &doff);
dd = get_definer(ctx, phi, &dsz, &doff);
*sz = MAX2(*sz, dsz);
*off = doff;
@ -428,7 +443,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
dd = get_definer(d->regs[1]->instr, &dsz, &doff);
dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
/* by definition, should come before: */
debug_assert(dd->ip < d->ip);
@ -440,9 +455,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
d = dd;
}
id->defn = d;
id->sz = *sz;
id->off = *off;
return d;
}
static void
ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
if (instr->regs_count == 0)
continue;
id->defn = get_definer(ctx, instr, &id->sz, &id->off);
id->cls = size_to_class(id->sz, is_half(id->defn));
}
}
/* give each instruction a name (and ip), and count up the # of names
* of each class
*/
@ -450,8 +481,7 @@ static void
ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
struct ir3_instruction *defn;
int cls, sz, off;
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
#ifdef DEBUG
instr->name = ~0;
@ -465,9 +495,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (!writes_gpr(instr))
continue;
defn = get_definer(instr, &sz, &off);
if (defn != instr)
if (id->defn != instr)
continue;
/* arrays which don't fit in one of the pre-defined class
@ -475,9 +503,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*
* TODO but we still need to allocate names for them, don't we??
*/
cls = size_to_class(sz, is_half(defn));
if (cls >= 0) {
instr->name = ctx->class_alloc_count[cls]++;
if (id->cls >= 0) {
instr->name = ctx->class_alloc_count[id->cls]++;
ctx->alloc_count++;
}
}
@ -486,8 +513,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
unsigned n;
ir3_clear_mark(ctx->ir);
ir3_count_instructions(ctx->ir);
n = ir3_count_instructions(ctx->ir);
ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
ra_block_find_definers(ctx, block);
}
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
ra_block_name_instructions(ctx, block);
@ -503,6 +538,7 @@ ra_init(struct ir3_ra_ctx *ctx)
}
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
}
@ -570,39 +606,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*/
if (writes_gpr(instr)) {
struct ir3_instruction *defn;
int cls, sz, off;
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
defn = get_definer(instr, &sz, &off);
if (defn == instr) {
if (id->defn == instr) {
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*/
cls = size_to_class(sz, is_half(defn));
if (cls >= 0) {
unsigned name = ra_name(ctx, cls, defn);
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->def[name] = defn->ip;
ctx->use[name] = defn->ip;
ctx->def[name] = id->defn->ip;
ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
if (is_half(defn)) {
if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
ctx->set->half_classes[cls - class_count]);
ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
ctx->set->classes[cls]);
ctx->set->classes[id->cls]);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
struct ir3_instruction *phi = defn->regs[0]->instr;
if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
@ -621,13 +654,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
struct ir3_instruction *srcdefn;
int cls, sz, off;
struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
srcdefn = get_definer(src, &sz, &off);
cls = size_to_class(sz, is_half(srcdefn));
if (cls >= 0) {
unsigned name = ra_name(ctx, cls, srcdefn);
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
@ -719,13 +749,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
/* need to fix things up to keep outputs live: */
for (unsigned i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *instr = ir->outputs[i];
struct ir3_instruction *defn;
int cls, sz, off;
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
defn = get_definer(instr, &sz, &off);
cls = size_to_class(sz, is_half(defn));
if (cls >= 0) {
unsigned name = ra_name(ctx, cls, defn);
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = ctx->instr_cnt;
}
}
@ -795,15 +822,12 @@ static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
struct ir3_instruction *defn;
int cls, sz, off;
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
defn = get_definer(instr, &sz, &off);
cls = size_to_class(sz, is_half(defn));
if (cls >= 0) {
unsigned name = ra_name(ctx, cls, defn);
if (id->cls >= 0) {
unsigned name = ra_name(ctx, id->cls, id->defn);
unsigned r = ra_get_node_reg(ctx->g, name);
unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
@ -811,7 +835,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
if (is_half(defn))
if (is_half(id->defn))
reg->flags |= IR3_REG_HALF;
}
}
@ -866,19 +890,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
for (j = 0; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
struct ir3_instruction *defn;
int cls, sz, off;
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
defn = get_definer(instr, &sz, &off);
if (defn == instr) {
if (id->defn == instr) {
unsigned name, reg;
cls = size_to_class(sz, is_half(defn));
name = ra_name(ctx, cls, defn);
reg = ctx->set->gpr_to_ra_reg[cls][j];
name = ra_name(ctx, id->cls, id->defn);
reg = ctx->set->gpr_to_ra_reg[id->cls][j];
ra_set_node_reg(ctx->g, name, reg);
j += sz;
j += id->sz;
}
}
}