2021-02-19 11:33:49 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2021 Valve Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
#include "util/rb_tree.h"
|
2021-02-19 11:33:49 +00:00
|
|
|
#include "ir3_ra.h"
|
|
|
|
#include "ir3_shader.h"
|
|
|
|
|
|
|
|
/*
|
2021-07-23 12:12:30 +01:00
|
|
|
* This pass does two things:
|
2021-02-19 11:33:49 +00:00
|
|
|
*
|
|
|
|
* 1. Calculates the maximum register pressure. To do this, we need to use the
|
2021-07-23 12:12:30 +01:00
|
|
|
* exact same technique that RA uses for combining meta_split instructions
|
|
|
|
* with their sources, so that our calculation agrees with RA.
|
|
|
|
* 2. Spills when the register pressure is exceeded a limit calculated by RA.
|
|
|
|
* The implementation is based on "Register Spilling and Live-Range Splitting
|
|
|
|
* for SSA-Form Programs" by Braun and Hack, although again care has to be
|
|
|
|
* taken to handle combining split/collect instructions.
|
2021-02-19 11:33:49 +00:00
|
|
|
*/
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
struct reg_or_immed {
|
|
|
|
unsigned flags;
|
|
|
|
union {
|
|
|
|
struct ir3_register *def;
|
|
|
|
uint32_t uimm;
|
|
|
|
unsigned const_num;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2021-02-19 11:33:49 +00:00
|
|
|
struct ra_spill_interval {
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ir3_reg_interval interval;
|
2021-07-23 12:12:30 +01:00
|
|
|
|
|
|
|
struct rb_node node;
|
|
|
|
struct rb_node half_node;
|
|
|
|
|
|
|
|
/* The current SSA value/const/immed this source is mapped to. */
|
|
|
|
struct reg_or_immed dst;
|
|
|
|
|
|
|
|
/* When computing use distances we use the distance relative to the start
|
|
|
|
* of the block. So, for example, a value that's defined in cycle 5 of the
|
|
|
|
* block and used 6 cycles later will always have a next_use_distance of 11
|
|
|
|
* until we reach that use.
|
|
|
|
*/
|
|
|
|
unsigned next_use_distance;
|
|
|
|
|
|
|
|
/* Whether this value was reloaded and therefore doesn't need to be
|
|
|
|
* spilled again. Corresponds to the S set in the paper.
|
|
|
|
*/
|
|
|
|
bool already_spilled;
|
|
|
|
|
|
|
|
/* We need to add sources early for accounting purposes, but we have to
|
|
|
|
* insert the reload code for them last. Keep track of whether this interval
|
|
|
|
* needs to be reloaded later.
|
|
|
|
*/
|
|
|
|
bool needs_reload;
|
|
|
|
|
|
|
|
/* Keep track of whether this interval currently can't be spilled because:
|
|
|
|
* - It or one of its children is a source and we're making space for
|
|
|
|
* sources.
|
|
|
|
* - It is a destination and we're making space for destinations.
|
|
|
|
*/
|
|
|
|
bool cant_spill;
|
2021-10-11 16:02:28 +01:00
|
|
|
|
|
|
|
/* Whether this interval can be rematerialized. */
|
|
|
|
bool can_rematerialize;
|
2021-07-23 12:12:30 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ra_spill_block_state {
|
|
|
|
unsigned *next_use_end;
|
|
|
|
unsigned *next_use_start;
|
|
|
|
|
|
|
|
unsigned cycles;
|
|
|
|
|
|
|
|
/* Map from SSA def to reg_or_immed it is mapped to at the end of the block.
|
|
|
|
* This map only contains values which we didn't spill, so it also serves as
|
|
|
|
* a record of the new live-out set for this block.
|
|
|
|
*/
|
|
|
|
struct hash_table *remap;
|
|
|
|
|
|
|
|
/* For blocks whose successors are visited first (i.e. loop backedges), which
|
|
|
|
* values should be live at the end.
|
|
|
|
*/
|
|
|
|
BITSET_WORD *live_out;
|
|
|
|
|
|
|
|
bool visited;
|
2021-02-19 11:33:49 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ra_spill_ctx {
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ir3_reg_ctx reg_ctx;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_interval **intervals;
|
|
|
|
unsigned intervals_count;
|
|
|
|
|
|
|
|
/* rb tree of live intervals that we can spill, ordered by next-use distance.
|
|
|
|
* full_live_intervals contains the full+shared intervals in the merged_regs
|
|
|
|
* case. We use this list to determine what to spill.
|
|
|
|
*/
|
|
|
|
struct rb_tree full_live_intervals;
|
|
|
|
struct rb_tree half_live_intervals;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ir3_pressure cur_pressure, max_pressure;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ir3_pressure limit_pressure;
|
|
|
|
|
|
|
|
/* When spilling, we need to reserve a register to serve as the zero'd
|
|
|
|
* "base". For simplicity we reserve a register at the beginning so that it's
|
|
|
|
* always available.
|
|
|
|
*/
|
|
|
|
struct ir3_register *base_reg;
|
|
|
|
|
|
|
|
/* Current pvtmem offset in bytes. */
|
|
|
|
unsigned spill_slot;
|
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ir3_liveness *live;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
const struct ir3_compiler *compiler;
|
2021-07-23 12:12:30 +01:00
|
|
|
|
|
|
|
struct ra_spill_block_state *blocks;
|
|
|
|
|
|
|
|
bool spilling;
|
|
|
|
|
|
|
|
bool merged_regs;
|
2021-02-19 11:33:49 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
2021-07-23 12:12:30 +01:00
|
|
|
add_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ir3_block *start = ir3_start_block(ir);
|
|
|
|
|
|
|
|
/* We need to stick it after any meta instructions which need to be first. */
|
|
|
|
struct ir3_instruction *after = NULL;
|
|
|
|
foreach_instr (instr, &start->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_INPUT &&
|
|
|
|
instr->opc != OPC_META_TEX_PREFETCH) {
|
|
|
|
after = instr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ir3_instruction *mov = create_immed(start, 0);
|
|
|
|
|
|
|
|
if (after)
|
|
|
|
ir3_instr_move_before(mov, after);
|
|
|
|
|
|
|
|
ctx->base_reg = mov->dsts[0];
|
|
|
|
|
|
|
|
/* We don't create an interval, etc. for the base reg, so just lower the
|
|
|
|
* register pressure limit to account for it. We assume it's always
|
|
|
|
* available for simplicity.
|
|
|
|
*/
|
|
|
|
ctx->limit_pressure.full -= reg_size(ctx->base_reg);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Compute the number of cycles per instruction used for next-use-distance
|
|
|
|
* analysis. This is just approximate, obviously.
|
|
|
|
*/
|
|
|
|
static unsigned
|
|
|
|
instr_cycles(struct ir3_instruction *instr)
|
|
|
|
{
|
|
|
|
if (instr->opc == OPC_META_PARALLEL_COPY) {
|
|
|
|
unsigned cycles = 0;
|
|
|
|
for (unsigned i = 0; i < instr->dsts_count; i++) {
|
|
|
|
if (!instr->srcs[i]->def ||
|
|
|
|
instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) {
|
|
|
|
cycles += reg_elems(instr->srcs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return cycles;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (instr->opc == OPC_META_COLLECT) {
|
|
|
|
unsigned cycles = 0;
|
|
|
|
for (unsigned i = 0; i < instr->srcs_count; i++) {
|
|
|
|
if (!instr->srcs[i]->def ||
|
|
|
|
instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) {
|
|
|
|
cycles++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return cycles;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_meta(instr))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1 + instr->repeat;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
compute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block,
|
|
|
|
unsigned *tmp_next_use)
|
|
|
|
{
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[block->index];
|
|
|
|
memcpy(tmp_next_use, state->next_use_end,
|
|
|
|
ctx->live->definitions_count * sizeof(*tmp_next_use));
|
|
|
|
|
|
|
|
unsigned cycle = state->cycles;
|
|
|
|
foreach_instr_rev (instr, &block->instr_list) {
|
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
dst->next_use = tmp_next_use[dst->name];
|
|
|
|
}
|
|
|
|
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
src->next_use = tmp_next_use[src->def->name];
|
|
|
|
}
|
|
|
|
|
|
|
|
cycle -= instr_cycles(instr);
|
|
|
|
|
|
|
|
if (instr->opc == OPC_META_PARALLEL_COPY) {
|
|
|
|
ra_foreach_src_n (src, i, instr) {
|
|
|
|
if (src->def->merge_set == instr->dsts[i]->merge_set &&
|
|
|
|
src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) {
|
|
|
|
tmp_next_use[src->def->name] =
|
|
|
|
tmp_next_use[instr->dsts[i]->name];
|
|
|
|
} else {
|
|
|
|
tmp_next_use[src->def->name] = cycle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (instr->opc != OPC_META_PHI) {
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
tmp_next_use[src->def->name] = cycle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
tmp_next_use[dst->name] = UINT_MAX;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(state->next_use_start, tmp_next_use,
|
|
|
|
ctx->live->definitions_count * sizeof(*tmp_next_use));
|
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
const struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index];
|
|
|
|
|
|
|
|
/* Add a large-enough distance in front of edges exiting the loop so that
|
|
|
|
* variables that are live-through the loop but not used inside it are
|
|
|
|
* prioritized for spilling, as per the paper. This just needs to be
|
|
|
|
* larger than the longest path through the loop.
|
|
|
|
*/
|
|
|
|
bool loop_exit = pred->loop_depth < block->loop_depth;
|
|
|
|
unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0);
|
|
|
|
|
|
|
|
for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
|
|
|
|
if (state->next_use_start[j] < UINT_MAX &&
|
|
|
|
state->next_use_start[j] + block_distance <
|
|
|
|
pred_state->next_use_end[j]) {
|
|
|
|
pred_state->next_use_end[j] = state->next_use_start[j] +
|
|
|
|
block_distance;
|
|
|
|
progress = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach_instr (phi, &block->instr_list) {
|
|
|
|
if (phi->opc != OPC_META_PHI)
|
|
|
|
break;
|
|
|
|
if (!phi->srcs[i]->def)
|
|
|
|
continue;
|
|
|
|
unsigned src = phi->srcs[i]->def->name;
|
|
|
|
if (phi->dsts[0]->next_use < UINT_MAX &&
|
|
|
|
phi->dsts[0]->next_use + block_distance <
|
|
|
|
pred_state->next_use_end[src]) {
|
|
|
|
pred_state->next_use_end[src] = phi->dsts[0]->next_use +
|
|
|
|
block_distance;
|
|
|
|
progress = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return progress;
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-07-23 12:12:30 +01:00
|
|
|
compute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
for (unsigned i = 0; i < ctx->live->block_count; i++) {
|
|
|
|
ctx->blocks[i].next_use_start =
|
|
|
|
ralloc_array(ctx, unsigned, ctx->live->definitions_count);
|
|
|
|
ctx->blocks[i].next_use_end =
|
|
|
|
ralloc_array(ctx, unsigned, ctx->live->definitions_count);
|
|
|
|
|
|
|
|
for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
|
|
|
|
ctx->blocks[i].next_use_start[j] = UINT_MAX;
|
|
|
|
ctx->blocks[i].next_use_end[j] = UINT_MAX;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[block->index];
|
|
|
|
state->cycles = 0;
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
state->cycles += instr_cycles(instr);
|
|
|
|
foreach_dst (dst, instr) {
|
|
|
|
dst->spill_slot = ~0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned *tmp_next_use =
|
|
|
|
ralloc_array(ctx, unsigned, ctx->live->definitions_count);
|
|
|
|
|
|
|
|
bool progress = true;
|
|
|
|
while (progress) {
|
|
|
|
progress = false;
|
|
|
|
foreach_block_rev (block, &ir->block_list) {
|
|
|
|
progress |= compute_block_next_distance(ctx, block, tmp_next_use);
|
|
|
|
}
|
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-10-11 16:02:28 +01:00
|
|
|
static bool
|
|
|
|
can_rematerialize(struct ir3_register *reg)
|
|
|
|
{
|
|
|
|
if (reg->flags & IR3_REG_ARRAY)
|
|
|
|
return false;
|
|
|
|
if (reg->instr->opc != OPC_MOV)
|
|
|
|
return false;
|
|
|
|
if (!(reg->instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
|
|
|
|
return false;
|
|
|
|
if (reg->instr->srcs[0]->flags & IR3_REG_RELATIV)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ir3_register *
|
|
|
|
rematerialize(struct ir3_register *reg, struct ir3_instruction *after,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
d("rematerializing ssa_%u:%u", reg->instr->serialno, reg->name);
|
|
|
|
|
|
|
|
struct ir3_instruction *remat =
|
|
|
|
ir3_instr_create(block, reg->instr->opc, 1, reg->instr->srcs_count);
|
|
|
|
struct ir3_register *dst = __ssa_dst(remat);
|
|
|
|
dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
|
|
|
|
for (unsigned i = 0; i < reg->instr->srcs_count; i++) {
|
|
|
|
struct ir3_register *src =
|
|
|
|
ir3_src_create(remat, INVALID_REG, reg->instr->srcs[i]->flags);
|
|
|
|
*src = *reg->instr->srcs[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
remat->cat1 = reg->instr->cat1;
|
|
|
|
|
|
|
|
dst->merge_set = reg->merge_set;
|
|
|
|
dst->merge_set_offset = reg->merge_set_offset;
|
|
|
|
dst->interval_start = reg->interval_start;
|
|
|
|
dst->interval_end = reg->interval_end;
|
|
|
|
|
|
|
|
if (after)
|
|
|
|
ir3_instr_move_before(remat, after);
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2021-02-19 11:33:49 +00:00
|
|
|
static void
|
2021-07-23 12:12:30 +01:00
|
|
|
ra_spill_interval_init(struct ra_spill_interval *interval,
|
|
|
|
struct ir3_register *reg)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
ir3_reg_interval_init(&interval->interval, reg);
|
|
|
|
interval->dst.flags = reg->flags;
|
|
|
|
interval->dst.def = reg;
|
|
|
|
interval->already_spilled = false;
|
|
|
|
interval->needs_reload = false;
|
|
|
|
interval->cant_spill = false;
|
2021-10-11 16:02:28 +01:00
|
|
|
interval->can_rematerialize = can_rematerialize(reg);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct ra_spill_interval *
|
|
|
|
ir3_reg_interval_to_interval(struct ir3_reg_interval *interval)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
return rb_node_data(struct ra_spill_interval, interval, interval);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
static struct ra_spill_interval *
|
|
|
|
ra_spill_interval_root(struct ra_spill_interval *interval)
|
|
|
|
{
|
|
|
|
struct ir3_reg_interval *ir3_interval = &interval->interval;
|
|
|
|
while (ir3_interval->parent)
|
|
|
|
ir3_interval = ir3_interval->parent;
|
|
|
|
return ir3_reg_interval_to_interval(ir3_interval);
|
|
|
|
}
|
|
|
|
|
2021-02-19 11:33:49 +00:00
|
|
|
static struct ra_spill_ctx *
|
|
|
|
ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-10-11 16:02:28 +01:00
|
|
|
static int
|
|
|
|
spill_interval_cmp(const struct ra_spill_interval *a,
|
|
|
|
const struct ra_spill_interval *b)
|
|
|
|
{
|
|
|
|
/* Prioritize intervals that we can rematerialize. */
|
|
|
|
if (a->can_rematerialize && !b->can_rematerialize)
|
|
|
|
return 1;
|
|
|
|
if (!a->can_rematerialize && b->can_rematerialize)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return a->next_use_distance - b->next_use_distance;
|
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
static int
|
|
|
|
ra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b)
|
|
|
|
{
|
|
|
|
const struct ra_spill_interval *a =
|
|
|
|
rb_node_data(const struct ra_spill_interval, _a, node);
|
|
|
|
const struct ra_spill_interval *b =
|
|
|
|
rb_node_data(const struct ra_spill_interval, _b, node);
|
2021-10-11 16:02:28 +01:00
|
|
|
return spill_interval_cmp(a, b);
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b)
|
|
|
|
{
|
|
|
|
const struct ra_spill_interval *a =
|
|
|
|
rb_node_data(const struct ra_spill_interval, _a, half_node);
|
|
|
|
const struct ra_spill_interval *b =
|
|
|
|
rb_node_data(const struct ra_spill_interval, _b, half_node);
|
2021-10-11 16:02:28 +01:00
|
|
|
return spill_interval_cmp(a, b);
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
|
2021-02-19 11:33:49 +00:00
|
|
|
static void
|
|
|
|
interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
|
|
|
|
struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
unsigned size = reg_size(interval->interval.reg);
|
|
|
|
if (interval->interval.reg->flags & IR3_REG_SHARED) {
|
|
|
|
ctx->cur_pressure.shared += size;
|
|
|
|
} else {
|
|
|
|
if (interval->interval.reg->flags & IR3_REG_HALF) {
|
|
|
|
ctx->cur_pressure.half += size;
|
|
|
|
if (ctx->spilling) {
|
|
|
|
rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
|
|
|
|
ra_spill_interval_half_cmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
|
|
|
|
ctx->cur_pressure.full += size;
|
|
|
|
if (ctx->spilling) {
|
|
|
|
rb_tree_insert(&ctx->full_live_intervals, &interval->node,
|
|
|
|
ra_spill_interval_cmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
interval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
|
|
|
|
struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
unsigned size = reg_size(interval->interval.reg);
|
|
|
|
if (interval->interval.reg->flags & IR3_REG_SHARED) {
|
|
|
|
ctx->cur_pressure.shared -= size;
|
|
|
|
} else {
|
|
|
|
if (interval->interval.reg->flags & IR3_REG_HALF) {
|
|
|
|
ctx->cur_pressure.half -= size;
|
|
|
|
if (ctx->spilling) {
|
|
|
|
rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
|
|
|
|
ctx->cur_pressure.full -= size;
|
|
|
|
if (ctx->spilling) {
|
|
|
|
rb_tree_remove(&ctx->full_live_intervals, &interval->node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
interval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent,
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ir3_reg_interval *_child)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
interval_add(_ctx, _child);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-07-23 12:12:30 +01:00
|
|
|
spill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v,
|
|
|
|
struct ir3_liveness *live)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
ctx->live = live;
|
|
|
|
ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *,
|
|
|
|
ctx->live->definitions_count);
|
|
|
|
struct ra_spill_interval *intervals =
|
|
|
|
rzalloc_array(ctx, struct ra_spill_interval,
|
|
|
|
ctx->live->definitions_count);
|
|
|
|
for (unsigned i = 0; i < ctx->live->definitions_count; i++)
|
|
|
|
ctx->intervals[i] = &intervals[i];
|
|
|
|
|
|
|
|
ctx->intervals_count = ctx->live->definitions_count;
|
|
|
|
ctx->compiler = v->shader->compiler;
|
|
|
|
ctx->merged_regs = v->mergedregs;
|
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
rb_tree_init(&ctx->reg_ctx.intervals);
|
|
|
|
ctx->reg_ctx.interval_add = interval_add;
|
|
|
|
ctx->reg_ctx.interval_delete = interval_delete;
|
|
|
|
ctx->reg_ctx.interval_readd = interval_readd;
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_spill_ctx_insert(struct ra_spill_ctx *ctx,
|
|
|
|
struct ra_spill_interval *interval)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_spill_ctx_remove(struct ra_spill_ctx *ctx,
|
|
|
|
struct ra_spill_interval *interval)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
|
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_interval *interval = ctx->intervals[dst->name];
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_spill_interval_init(interval, dst);
|
2021-10-11 16:02:28 +01:00
|
|
|
if (ctx->spilling) {
|
2021-07-23 12:12:30 +01:00
|
|
|
interval->next_use_distance = dst->next_use;
|
2021-10-11 16:02:28 +01:00
|
|
|
|
|
|
|
/* We only need to keep track of used-ness if this value may be
|
|
|
|
* rematerialized. This also keeps us from nuking things that may be
|
|
|
|
* in the keeps list (e.g. atomics, input splits).
|
|
|
|
*/
|
|
|
|
if (interval->can_rematerialize)
|
|
|
|
dst->instr->flags |= IR3_INSTR_UNUSED;
|
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
|
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_interval *interval = ctx->intervals[dst->name];
|
2021-07-09 13:50:05 +01:00
|
|
|
if (interval->interval.inserted)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ra_spill_ctx_insert(ctx, interval);
|
2021-07-23 12:12:30 +01:00
|
|
|
interval->cant_spill = true;
|
2021-07-09 13:50:05 +01:00
|
|
|
|
|
|
|
/* For precolored inputs, make sure we leave enough registers to allow for
|
|
|
|
* holes in the inputs. It can happen that the binning shader has a lower
|
|
|
|
* register pressure than the main shader, but the main shader decided to
|
|
|
|
* add holes between the inputs which means that the binning shader has a
|
|
|
|
* higher register demand.
|
|
|
|
*/
|
|
|
|
if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) {
|
|
|
|
physreg_t physreg = ra_reg_get_physreg(dst);
|
|
|
|
physreg_t max = physreg + reg_size(dst);
|
|
|
|
|
|
|
|
if (interval->interval.reg->flags & IR3_REG_SHARED)
|
|
|
|
ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max);
|
|
|
|
else if (interval->interval.reg->flags & IR3_REG_HALF)
|
|
|
|
ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max);
|
|
|
|
else
|
|
|
|
ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max);
|
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-07-23 12:12:30 +01:00
|
|
|
insert_src(struct ra_spill_ctx *ctx, struct ir3_register *src)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_interval *interval = ctx->intervals[src->def->name];
|
|
|
|
|
2021-10-31 15:01:02 +00:00
|
|
|
if (!interval->interval.inserted) {
|
|
|
|
ra_spill_ctx_insert(ctx, interval);
|
|
|
|
interval->needs_reload = true;
|
|
|
|
interval->already_spilled = true;
|
|
|
|
}
|
2021-07-23 12:12:30 +01:00
|
|
|
|
2021-10-31 15:01:02 +00:00
|
|
|
ra_spill_interval_root(interval)->cant_spill = true;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
|
|
|
|
struct ir3_register *src)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[src->def->name];
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
if (!interval->interval.inserted || interval->interval.parent ||
|
|
|
|
!rb_tree_is_empty(&interval->interval.children))
|
|
|
|
return;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_spill_ctx_remove(ctx, interval);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-07-09 13:50:05 +01:00
|
|
|
remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
|
|
|
|
struct ir3_register *src)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_interval *interval = ctx->intervals[src->def->name];
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
if (!interval->interval.inserted)
|
|
|
|
return;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_spill_ctx_remove(ctx, interval);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
static void
|
|
|
|
finish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[dst->name];
|
|
|
|
interval->cant_spill = false;
|
|
|
|
}
|
|
|
|
|
2021-02-19 11:33:49 +00:00
|
|
|
static void
|
|
|
|
remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
|
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_interval *interval = ctx->intervals[dst->name];
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
if (!interval->interval.inserted)
|
|
|
|
return;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_spill_ctx_remove(ctx, interval);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
static void
|
|
|
|
update_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[src->def->name];
|
|
|
|
|
|
|
|
assert(interval->interval.inserted);
|
|
|
|
|
|
|
|
interval->next_use_distance = src->next_use;
|
|
|
|
|
|
|
|
/* If this node is inserted in one of the trees, then it needs to be resorted
|
|
|
|
* as its key has changed.
|
|
|
|
*/
|
|
|
|
if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) {
|
|
|
|
if (src->flags & IR3_REG_HALF) {
|
|
|
|
rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
|
|
|
|
rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
|
|
|
|
ra_spill_interval_half_cmp);
|
|
|
|
}
|
|
|
|
if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) {
|
|
|
|
rb_tree_remove(&ctx->full_live_intervals, &interval->node);
|
|
|
|
rb_tree_insert(&ctx->full_live_intervals, &interval->node,
|
|
|
|
ra_spill_interval_cmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned
|
|
|
|
get_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg)
|
|
|
|
{
|
|
|
|
if (reg->merge_set) {
|
|
|
|
if (reg->merge_set->spill_slot == ~0) {
|
|
|
|
reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot,
|
|
|
|
reg->merge_set->alignment);
|
|
|
|
ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2;
|
|
|
|
}
|
|
|
|
return reg->merge_set->spill_slot + reg->merge_set_offset * 2;
|
|
|
|
} else {
|
|
|
|
if (reg->spill_slot == ~0) {
|
|
|
|
reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg));
|
|
|
|
ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2;
|
|
|
|
}
|
|
|
|
return reg->spill_slot;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
set_src_val(struct ir3_register *src, const struct reg_or_immed *val)
|
|
|
|
{
|
|
|
|
if (val->flags & IR3_REG_IMMED) {
|
|
|
|
src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF);
|
|
|
|
src->uim_val = val->uimm;
|
|
|
|
src->def = NULL;
|
|
|
|
} else if (val->flags & IR3_REG_CONST) {
|
|
|
|
src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF);
|
|
|
|
src->num = val->const_num;
|
|
|
|
src->def = NULL;
|
|
|
|
} else {
|
|
|
|
src->def = val->def;
|
2021-10-11 16:02:28 +01:00
|
|
|
val->def->instr->flags &= ~IR3_INSTR_UNUSED;
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ir3_register *
|
|
|
|
materialize_pcopy_src(const struct reg_or_immed *src,
|
|
|
|
struct ir3_instruction *instr,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
|
|
|
|
struct ir3_register *dst = __ssa_dst(mov);
|
|
|
|
dst->flags |= src->flags & IR3_REG_HALF;
|
|
|
|
struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags);
|
|
|
|
set_src_val(mov_src, src);
|
|
|
|
mov->cat1.src_type = mov->cat1.dst_type =
|
|
|
|
(src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
|
|
|
|
|
|
if (instr)
|
|
|
|
ir3_instr_move_before(mov, instr);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val,
|
|
|
|
unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ir3_register *reg;
|
|
|
|
|
|
|
|
/* If spilling an immed/const pcopy src, we need to actually materialize it
|
|
|
|
* first with a mov.
|
|
|
|
*/
|
|
|
|
if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
|
|
|
|
reg = materialize_pcopy_src(val, instr, block);
|
|
|
|
} else {
|
|
|
|
reg = val->def;
|
2021-10-11 16:02:28 +01:00
|
|
|
reg->instr->flags &= ~IR3_INSTR_UNUSED;
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name,
|
|
|
|
spill_slot);
|
|
|
|
|
|
|
|
unsigned elems = reg_elems(reg);
|
|
|
|
struct ir3_instruction *spill =
|
|
|
|
ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3);
|
|
|
|
ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
|
|
|
|
unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED |
|
|
|
|
IR3_REG_CONST | IR3_REG_SSA |
|
|
|
|
IR3_REG_ARRAY);
|
|
|
|
struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags);
|
|
|
|
ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
|
|
|
|
spill->cat6.dst_offset = spill_slot;
|
|
|
|
spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
|
|
|
|
|
|
src->def = reg;
|
|
|
|
if (reg->flags & IR3_REG_ARRAY) {
|
|
|
|
src->size = reg->size;
|
|
|
|
src->array.id = reg->array.id;
|
|
|
|
src->array.offset = 0;
|
|
|
|
} else {
|
|
|
|
src->wrmask = reg->wrmask;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (instr)
|
|
|
|
ir3_instr_move_before(spill, instr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
|
|
|
|
struct ir3_instruction *instr, struct ir3_block *block)
|
|
|
|
{
|
2021-10-11 16:02:28 +01:00
|
|
|
if (interval->can_rematerialize && !interval->interval.reg->merge_set)
|
|
|
|
return;
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg),
|
|
|
|
instr, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This is similar to "limit" in the paper. */
|
|
|
|
static void
|
|
|
|
limit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
|
|
|
|
{
|
|
|
|
if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
|
|
|
|
d("cur half pressure %u exceeds %u", ctx->cur_pressure.half,
|
|
|
|
ctx->limit_pressure.half);
|
|
|
|
rb_tree_foreach_safe (struct ra_spill_interval, interval,
|
|
|
|
&ctx->half_live_intervals, half_node) {
|
|
|
|
d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
|
|
|
|
interval->interval.reg->name);
|
|
|
|
if (!interval->cant_spill) {
|
|
|
|
if (!interval->already_spilled)
|
|
|
|
spill_interval(ctx, interval, instr, instr->block);
|
|
|
|
ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
|
|
|
|
if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ctx->cur_pressure.half <= ctx->limit_pressure.half);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
|
|
|
|
d("cur full pressure %u exceeds %u", ctx->cur_pressure.full,
|
|
|
|
ctx->limit_pressure.full);
|
|
|
|
rb_tree_foreach_safe (struct ra_spill_interval, interval,
|
|
|
|
&ctx->full_live_intervals, node) {
|
|
|
|
d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
|
|
|
|
interval->interval.reg->name);
|
|
|
|
if (!interval->cant_spill) {
|
|
|
|
if (!interval->already_spilled)
|
|
|
|
spill_interval(ctx, interval, instr, instr->block);
|
|
|
|
ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
|
|
|
|
if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
d("can't spill");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ctx->cur_pressure.full <= ctx->limit_pressure.full);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* There's a corner case where we reload a value which has overlapping live
|
|
|
|
* values already reloaded, either because it's the child of some other interval
|
|
|
|
* that was already reloaded or some of its children have already been
|
|
|
|
* reloaded. Because RA only expects overlapping source/dest intervals for meta
|
|
|
|
* instructions (split/collect), and we don't want to add register pressure by
|
|
|
|
* creating an entirely separate value, we need to add splits and collects to
|
|
|
|
* deal with this case. These splits/collects have to also have correct merge
|
|
|
|
* set information, so that it doesn't result in any actual code or register
|
|
|
|
* pressure in practice.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
add_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def,
|
|
|
|
unsigned offset)
|
|
|
|
{
|
|
|
|
def->merge_set = set;
|
|
|
|
def->merge_set_offset = offset;
|
|
|
|
def->interval_start = set->interval_start + offset;
|
|
|
|
def->interval_end = set->interval_start + offset + reg_size(def);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ir3_register *
|
|
|
|
split(struct ir3_register *def, unsigned offset,
|
|
|
|
struct ir3_instruction *after, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
if (reg_elems(def) == 1) {
|
|
|
|
assert(offset == 0);
|
|
|
|
return def;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(!(def->flags & IR3_REG_ARRAY));
|
|
|
|
assert(def->merge_set);
|
|
|
|
struct ir3_instruction *split =
|
|
|
|
ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1);
|
|
|
|
struct ir3_register *dst = __ssa_dst(split);
|
|
|
|
dst->flags |= def->flags & IR3_REG_HALF;
|
|
|
|
struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags);
|
|
|
|
src->wrmask = def->wrmask;
|
|
|
|
src->def = def;
|
|
|
|
add_to_merge_set(def->merge_set, dst,
|
|
|
|
def->merge_set_offset + offset * reg_elem_size(def));
|
|
|
|
if (after)
|
|
|
|
ir3_instr_move_before(split, after);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ir3_register *
|
|
|
|
extract(struct ir3_register *parent_def, unsigned offset, unsigned elems,
|
|
|
|
struct ir3_instruction *after, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
if (offset == 0 && elems == reg_elems(parent_def))
|
|
|
|
return parent_def;
|
|
|
|
|
|
|
|
struct ir3_instruction *collect =
|
|
|
|
ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems);
|
|
|
|
struct ir3_register *dst = __ssa_dst(collect);
|
|
|
|
dst->flags |= parent_def->flags & IR3_REG_HALF;
|
|
|
|
dst->wrmask = MASK(elems);
|
|
|
|
add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < elems; i++) {
|
|
|
|
ir3_src_create(collect, INVALID_REG, parent_def->flags)->def =
|
|
|
|
split(parent_def, offset + i, after, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (after)
|
|
|
|
ir3_instr_move_before(collect, after);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ir3_register *
|
|
|
|
reload(struct ra_spill_ctx *ctx, struct ir3_register *reg,
|
|
|
|
struct ir3_instruction *after, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
unsigned spill_slot = get_spill_slot(ctx, reg);
|
|
|
|
|
|
|
|
d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name,
|
|
|
|
spill_slot);
|
|
|
|
|
|
|
|
unsigned elems = reg_elems(reg);
|
|
|
|
struct ir3_instruction *reload =
|
|
|
|
ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3);
|
|
|
|
struct ir3_register *dst = __ssa_dst(reload);
|
|
|
|
dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
|
2022-03-10 10:24:44 +00:00
|
|
|
/* The reload may be split into multiple pieces, and if the destination
|
|
|
|
* overlaps with the base register then it could get clobbered before the
|
|
|
|
* last ldp in the sequence. Note that we always reserve space for the base
|
|
|
|
* register throughout the whole program, so effectively extending its live
|
|
|
|
* range past the end of the instruction isn't a problem for our pressure
|
|
|
|
* accounting.
|
|
|
|
*/
|
|
|
|
dst->flags |= IR3_REG_EARLY_CLOBBER;
|
2021-07-23 12:12:30 +01:00
|
|
|
ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
|
|
|
|
struct ir3_register *offset_reg =
|
|
|
|
ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED);
|
|
|
|
offset_reg->uim_val = spill_slot;
|
|
|
|
ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
|
|
|
|
reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
|
|
|
|
|
|
if (reg->flags & IR3_REG_ARRAY) {
|
|
|
|
dst->array.offset = 0;
|
|
|
|
dst->array.id = reg->array.id;
|
|
|
|
dst->size = reg->size;
|
|
|
|
} else {
|
|
|
|
dst->wrmask = MASK(elems);
|
|
|
|
}
|
|
|
|
|
|
|
|
dst->merge_set = reg->merge_set;
|
|
|
|
dst->merge_set_offset = reg->merge_set_offset;
|
|
|
|
dst->interval_start = reg->interval_start;
|
|
|
|
dst->interval_end = reg->interval_end;
|
|
|
|
|
|
|
|
if (after)
|
|
|
|
ir3_instr_move_before(reload, after);
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
rewrite_src_interval(struct ra_spill_ctx *ctx,
|
|
|
|
struct ra_spill_interval *interval,
|
|
|
|
struct ir3_register *def,
|
|
|
|
struct ir3_instruction *instr,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
interval->dst.flags = def->flags;
|
|
|
|
interval->dst.def = def;
|
|
|
|
interval->needs_reload = false;
|
|
|
|
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, child,
|
|
|
|
&interval->interval.children, interval.node) {
|
|
|
|
struct ir3_register *child_reg = child->interval.reg;
|
|
|
|
struct ir3_register *child_def =
|
|
|
|
extract(def, (child_reg->interval_start -
|
|
|
|
interval->interval.reg->interval_start) / reg_elem_size(def),
|
|
|
|
reg_elems(child_reg), instr, block);
|
|
|
|
rewrite_src_interval(ctx, child, child_def, instr, block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
reload_def(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_instruction *instr, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
unsigned elems = reg_elems(def);
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[def->name];
|
|
|
|
|
|
|
|
struct ir3_reg_interval *ir3_parent = interval->interval.parent;
|
|
|
|
|
|
|
|
if (ir3_parent) {
|
|
|
|
struct ra_spill_interval *parent =
|
|
|
|
ir3_reg_interval_to_interval(ir3_parent);
|
|
|
|
if (!parent->needs_reload) {
|
|
|
|
interval->dst.flags = def->flags;
|
|
|
|
interval->dst.def = extract(
|
|
|
|
parent->dst.def, (def->interval_start - parent->dst.def->interval_start) /
|
|
|
|
reg_elem_size(def), elems, instr, block);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-11 16:02:28 +01:00
|
|
|
struct ir3_register *dst;
|
|
|
|
if (interval->can_rematerialize)
|
|
|
|
dst = rematerialize(def, instr, block);
|
|
|
|
else
|
|
|
|
dst = reload(ctx, def, instr, block);
|
2021-07-23 12:12:30 +01:00
|
|
|
|
|
|
|
rewrite_src_interval(ctx, interval, dst, instr, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
reload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
|
|
|
|
struct ir3_register *src)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[src->def->name];
|
|
|
|
|
|
|
|
if (interval->needs_reload) {
|
|
|
|
reload_def(ctx, src->def, instr, instr->block);
|
|
|
|
}
|
|
|
|
|
|
|
|
ra_spill_interval_root(interval)->cant_spill = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
rewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
|
|
|
|
struct ir3_register *src)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[src->def->name];
|
|
|
|
|
|
|
|
set_src_val(src, &interval->dst);
|
|
|
|
}
|
|
|
|
|
2021-02-19 11:33:49 +00:00
|
|
|
static void
|
|
|
|
update_max_pressure(struct ra_spill_ctx *ctx)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
d("pressure:");
|
|
|
|
d("\tfull: %u", ctx->cur_pressure.full);
|
|
|
|
d("\thalf: %u", ctx->cur_pressure.half);
|
|
|
|
d("\tshared: %u", ctx->cur_pressure.shared);
|
|
|
|
|
|
|
|
ctx->max_pressure.full =
|
|
|
|
MAX2(ctx->max_pressure.full, ctx->cur_pressure.full);
|
|
|
|
ctx->max_pressure.half =
|
|
|
|
MAX2(ctx->max_pressure.half, ctx->cur_pressure.half);
|
|
|
|
ctx->max_pressure.shared =
|
|
|
|
MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
init_dst(ctx, dst);
|
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
if (ctx->spilling) {
|
|
|
|
ra_foreach_src (src, instr)
|
|
|
|
insert_src(ctx, src);
|
|
|
|
}
|
|
|
|
|
2021-12-03 11:10:04 +00:00
|
|
|
/* Handle tied and early-kill destinations. If a destination is tied to a
|
|
|
|
* source and that source is live-through, then we need to allocate a new
|
|
|
|
* register for the destination which is live-through itself and cannot
|
|
|
|
* overlap the sources. Similarly early-kill destinations cannot overlap
|
2021-07-09 13:50:05 +01:00
|
|
|
* sources.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
struct ir3_register *tied_src = dst->tied;
|
2021-12-03 11:10:04 +00:00
|
|
|
if ((tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL)) ||
|
|
|
|
(dst->flags & IR3_REG_EARLY_CLOBBER))
|
2021-07-09 13:50:05 +01:00
|
|
|
insert_dst(ctx, dst);
|
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
if (ctx->spilling)
|
|
|
|
limit(ctx, instr);
|
|
|
|
else
|
|
|
|
update_max_pressure(ctx);
|
|
|
|
|
|
|
|
if (ctx->spilling) {
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
reload_src(ctx, instr, src);
|
|
|
|
update_src_next_use(ctx, src);
|
|
|
|
}
|
|
|
|
}
|
2021-07-09 13:50:05 +01:00
|
|
|
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
if (src->flags & IR3_REG_FIRST_KILL)
|
|
|
|
remove_src_early(ctx, instr, src);
|
|
|
|
}
|
|
|
|
|
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
insert_dst(ctx, dst);
|
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
if (ctx->spilling)
|
|
|
|
limit(ctx, instr);
|
|
|
|
else
|
|
|
|
update_max_pressure(ctx);
|
|
|
|
|
|
|
|
/* We have to remove sources before rewriting them so that we can lookup the
|
|
|
|
* interval to remove before the source itself is changed.
|
|
|
|
*/
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
if (src->flags & IR3_REG_FIRST_KILL)
|
|
|
|
remove_src(ctx, instr, src);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->spilling) {
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
rewrite_src(ctx, instr, src);
|
|
|
|
}
|
|
|
|
}
|
2021-07-09 13:50:05 +01:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
finish_dst(ctx, dst);
|
2021-07-09 13:50:05 +01:00
|
|
|
}
|
2021-07-23 12:12:30 +01:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
for (unsigned i = 0; i < instr->dsts_count; i++) {
|
|
|
|
if (ra_reg_is_dst(instr->dsts[i]) &&
|
|
|
|
(instr->dsts[i]->flags & IR3_REG_UNUSED))
|
|
|
|
remove_dst(ctx, instr->dsts[i]);
|
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
static struct ra_spill_interval *
|
|
|
|
create_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
unsigned name = ctx->intervals_count++;
|
|
|
|
unsigned offset = ctx->live->interval_offset;
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
/* This is kinda hacky, but we need to create a fake SSA def here that is
|
|
|
|
* only used as part of the pcopy accounting. See below.
|
|
|
|
*/
|
|
|
|
struct ir3_register *reg = rzalloc(ctx, struct ir3_register);
|
|
|
|
*reg = *def;
|
|
|
|
reg->name = name;
|
|
|
|
reg->interval_start = offset;
|
|
|
|
reg->interval_end = offset + reg_size(def);
|
|
|
|
reg->merge_set = NULL;
|
|
|
|
|
|
|
|
ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *,
|
|
|
|
ctx->intervals_count);
|
|
|
|
struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval);
|
|
|
|
ra_spill_interval_init(interval, reg);
|
|
|
|
ctx->intervals[name] = interval;
|
|
|
|
ctx->live->interval_offset += reg_size(def);
|
|
|
|
return interval;
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
/* In the sequence of copies generated (see below), would this source be killed?
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ir3_register *src = pcopy->srcs[src_n];
|
|
|
|
if (!(src->flags & IR3_REG_KILL))
|
|
|
|
return false;
|
|
|
|
for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) {
|
|
|
|
if (pcopy->srcs[j]->def == src->def)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
/* Parallel copies are different from normal instructions. The sources together
|
|
|
|
* may be larger than the entire register file, so we cannot just reload every
|
|
|
|
* source like normal, and indeed that probably wouldn't be a great idea.
|
|
|
|
* Instead we essentially need to lower the parallel copy to "copies," just like
|
|
|
|
* in the normal CSSA construction, although we implement the copies by
|
|
|
|
* reloading and then possibly spilling values. We essentially just shuffle
|
|
|
|
* around the sources until each source either (a) is live or (b) has the same
|
|
|
|
* spill slot as its corresponding destination. We do this by decomposing the
|
|
|
|
* copy into a series of copies, so:
|
|
|
|
*
|
|
|
|
* a, b, c = d, e, f
|
|
|
|
*
|
|
|
|
* becomes:
|
|
|
|
*
|
|
|
|
* d' = d
|
|
|
|
* e' = e
|
|
|
|
* f' = f
|
|
|
|
* a = d'
|
|
|
|
* b = e'
|
|
|
|
* c = f'
|
|
|
|
*
|
|
|
|
* the temporary SSA values d', e', and f' never actually show up in the result.
|
|
|
|
* They are only used for our internal accounting. They may, however, have their
|
|
|
|
* own spill slot created for them. Similarly, we don't actually emit any copy
|
|
|
|
* instructions, although we emit the spills/reloads that *would've* been
|
|
|
|
* required if those copies were there.
|
|
|
|
*
|
|
|
|
* TODO: in order to reduce the number of temporaries and therefore spill slots,
|
|
|
|
* we could instead do a more complicated analysis that considers the location
|
|
|
|
* transfer graph.
|
|
|
|
*
|
|
|
|
* In addition, we actually remove the parallel copy and rewrite all its uses
|
|
|
|
* (in the phi nodes) rather than rewrite its sources at the end. Recreating it
|
|
|
|
* later turns out to be easier than keeping it up-to-date throughout this pass,
|
|
|
|
* since we may have to remove entries for phi sources that are spilled and add
|
|
|
|
* entries for live-outs that are spilled and reloaded, which can happen here
|
|
|
|
* and then possibly be undone or done again when processing live-ins of the
|
|
|
|
* successor block.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
|
|
|
|
{
|
|
|
|
foreach_dst (dst, pcopy) {
|
|
|
|
struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
|
|
|
|
ra_spill_interval_init(dst_interval, dst);
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach_src_n (src, i, pcopy) {
|
|
|
|
d("processing src %u", i);
|
|
|
|
struct ir3_register *dst = pcopy->dsts[i];
|
|
|
|
|
|
|
|
/* Skip the intermediate copy for cases where the source is merged with
|
|
|
|
* the destination. Crucially this means that we also don't reload/spill
|
|
|
|
* it if it's been spilled, because it shares the same spill slot.
|
|
|
|
*/
|
|
|
|
if (src->def && src->def->merge_set &&
|
|
|
|
src->def->merge_set == dst->merge_set &&
|
|
|
|
src->def->merge_set_offset == dst->merge_set_offset) {
|
|
|
|
struct ra_spill_interval *src_interval = ctx->intervals[src->def->name];
|
|
|
|
struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
|
|
|
|
if (src_interval->interval.inserted) {
|
|
|
|
update_src_next_use(ctx, src);
|
|
|
|
if (is_last_pcopy_src(pcopy, i))
|
|
|
|
ra_spill_ctx_remove(ctx, src_interval);
|
|
|
|
dst_interval->cant_spill = true;
|
|
|
|
ra_spill_ctx_insert(ctx, dst_interval);
|
|
|
|
limit(ctx, pcopy);
|
|
|
|
dst_interval->cant_spill = false;
|
|
|
|
dst_interval->dst = src_interval->dst;
|
|
|
|
}
|
|
|
|
} else if (src->def) {
|
|
|
|
struct ra_spill_interval *temp_interval =
|
|
|
|
create_temp_interval(ctx, dst);
|
|
|
|
struct ir3_register *temp = temp_interval->interval.reg;
|
|
|
|
temp_interval->next_use_distance = src->next_use;
|
|
|
|
|
|
|
|
insert_src(ctx, src);
|
|
|
|
limit(ctx, pcopy);
|
|
|
|
reload_src(ctx, pcopy, src);
|
|
|
|
update_src_next_use(ctx, src);
|
|
|
|
if (is_last_pcopy_src(pcopy, i))
|
|
|
|
remove_src(ctx, pcopy, src);
|
|
|
|
struct ra_spill_interval *src_interval =
|
|
|
|
ctx->intervals[src->def->name];
|
|
|
|
temp_interval->dst = src_interval->dst;
|
|
|
|
|
|
|
|
temp_interval->cant_spill = true;
|
|
|
|
ra_spill_ctx_insert(ctx, temp_interval);
|
|
|
|
limit(ctx, pcopy);
|
|
|
|
temp_interval->cant_spill = false;
|
|
|
|
|
|
|
|
src->flags = temp->flags;
|
|
|
|
src->def = temp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
d("done with pcopy srcs");
|
|
|
|
|
|
|
|
foreach_src_n (src, i, pcopy) {
|
|
|
|
struct ir3_register *dst = pcopy->dsts[i];
|
|
|
|
|
|
|
|
if (src->def && src->def->merge_set &&
|
|
|
|
src->def->merge_set == dst->merge_set &&
|
|
|
|
src->def->merge_set_offset == dst->merge_set_offset)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
|
|
|
|
|
|
|
|
if (!src->def) {
|
|
|
|
dst_interval->cant_spill = true;
|
|
|
|
ra_spill_ctx_insert(ctx, dst_interval);
|
|
|
|
limit(ctx, pcopy);
|
|
|
|
dst_interval->cant_spill = false;
|
|
|
|
|
|
|
|
assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED));
|
|
|
|
if (src->flags & IR3_REG_CONST) {
|
|
|
|
dst_interval->dst.flags = src->flags;
|
|
|
|
dst_interval->dst.const_num = src->num;
|
|
|
|
} else {
|
|
|
|
dst_interval->dst.flags = src->flags;
|
|
|
|
dst_interval->dst.uimm = src->uim_val;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name];
|
|
|
|
|
|
|
|
insert_src(ctx, src);
|
|
|
|
limit(ctx, pcopy);
|
|
|
|
reload_src(ctx, pcopy, src);
|
|
|
|
remove_src(ctx, pcopy, src);
|
|
|
|
|
|
|
|
dst_interval->dst = temp_interval->dst;
|
|
|
|
ra_spill_ctx_insert(ctx, dst_interval);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pcopy->flags |= IR3_INSTR_UNUSED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
|
|
|
|
{
|
|
|
|
init_dst(ctx, instr->dsts[0]);
|
|
|
|
insert_dst(ctx, instr->dsts[0]);
|
|
|
|
finish_dst(ctx, instr->dsts[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
|
|
|
|
{
|
|
|
|
if (instr->opc == OPC_META_TEX_PREFETCH) {
|
|
|
|
ra_foreach_src (src, instr)
|
|
|
|
remove_src(ctx, instr, src);
|
|
|
|
}
|
|
|
|
if (instr->dsts[0]->flags & IR3_REG_UNUSED)
|
|
|
|
remove_dst(ctx, instr->dsts[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
handle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block,
|
|
|
|
struct ir3_register *def)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[def->name];
|
|
|
|
ra_spill_interval_init(interval, def);
|
|
|
|
if (ctx->spilling) {
|
|
|
|
interval->next_use_distance =
|
|
|
|
ctx->blocks[block->index].next_use_start[def->name];
|
|
|
|
}
|
|
|
|
|
|
|
|
ra_spill_ctx_insert(ctx, interval);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
is_live_in_phi(struct ir3_register *def, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
return def->instr->opc == OPC_META_PHI && def->instr->block == block;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
is_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block, unsigned pred_idx)
|
|
|
|
{
|
|
|
|
struct ir3_block *pred = block->predecessors[pred_idx];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
if (is_live_in_phi(def, block)) {
|
|
|
|
def = def->instr->srcs[pred_idx]->def;
|
|
|
|
if (!def)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _mesa_hash_table_search(state->remap, def);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
is_live_in_undef(struct ir3_register *def,
|
|
|
|
struct ir3_block *block, unsigned pred_idx)
|
|
|
|
{
|
|
|
|
if (!is_live_in_phi(def, block))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return !def->instr->srcs[pred_idx]->def;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct reg_or_immed *
|
|
|
|
read_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block, unsigned pred_idx)
|
|
|
|
{
|
|
|
|
struct ir3_block *pred = block->predecessors[pred_idx];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
|
|
|
|
if (is_live_in_phi(def, block)) {
|
|
|
|
def = def->instr->srcs[pred_idx]->def;
|
|
|
|
if (!def)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct hash_entry *entry = _mesa_hash_table_search(state->remap, def);
|
|
|
|
if (entry)
|
|
|
|
return entry->data;
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
is_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
if (!is_live_in_pred(ctx, def, block, i))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
|
|
|
|
if (!state->visited)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i);
|
|
|
|
if (pred_def) {
|
|
|
|
spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
bool all_preds_visited = true;
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
if (!state->visited) {
|
|
|
|
all_preds_visited = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Note: in the paper they explicitly spill live-through values first, but we
|
|
|
|
* should be doing that automatically by virtue of picking the largest
|
|
|
|
* distance due to the extra distance added to edges out of loops.
|
|
|
|
*
|
|
|
|
* TODO: Keep track of pressure in each block and preemptively spill
|
|
|
|
* live-through values as described in the paper to avoid spilling them
|
|
|
|
* inside the loop.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
|
|
|
|
rb_tree_foreach_safe (struct ra_spill_interval, interval,
|
|
|
|
&ctx->half_live_intervals, half_node) {
|
|
|
|
if (all_preds_visited &&
|
|
|
|
is_live_in_all_preds(ctx, interval->interval.reg, block))
|
|
|
|
continue;
|
2021-10-11 16:02:28 +01:00
|
|
|
if (interval->interval.reg->merge_set ||
|
|
|
|
!interval->can_rematerialize)
|
|
|
|
spill_live_in(ctx, interval->interval.reg, block);
|
2021-07-23 12:12:30 +01:00
|
|
|
ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
|
|
|
|
if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
|
|
|
|
rb_tree_foreach_safe (struct ra_spill_interval, interval,
|
|
|
|
&ctx->full_live_intervals, node) {
|
|
|
|
if (all_preds_visited &&
|
|
|
|
is_live_in_all_preds(ctx, interval->interval.reg, block))
|
|
|
|
continue;
|
|
|
|
spill_live_in(ctx, interval->interval.reg, block);
|
|
|
|
ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
|
|
|
|
if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
live_in_rewrite(struct ra_spill_ctx *ctx,
|
|
|
|
struct ra_spill_interval *interval,
|
|
|
|
struct reg_or_immed *new_val,
|
|
|
|
struct ir3_block *block, unsigned pred_idx)
|
|
|
|
{
|
|
|
|
struct ir3_block *pred = block->predecessors[pred_idx];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
struct ir3_register *def = interval->interval.reg;
|
|
|
|
if (is_live_in_phi(def, block)) {
|
|
|
|
def = def->instr->srcs[pred_idx]->def;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (def)
|
|
|
|
_mesa_hash_table_insert(state->remap, def, new_val);
|
|
|
|
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, child,
|
|
|
|
&interval->interval.children, interval.node) {
|
|
|
|
assert(new_val->flags & IR3_REG_SSA);
|
|
|
|
struct ir3_register *child_def =
|
|
|
|
extract(new_val->def,
|
|
|
|
(child->interval.reg->interval_start - def->interval_start) /
|
|
|
|
reg_elem_size(def), reg_elems(child->interval.reg),
|
|
|
|
NULL, pred);
|
|
|
|
struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed);
|
|
|
|
child_val->def = child_def;
|
|
|
|
child_val->flags = child_def->flags;
|
|
|
|
live_in_rewrite(ctx, child, child_val, block, pred_idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
reload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[def->name];
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
if (!state->visited)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (is_live_in_undef(def, block, i))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct reg_or_immed *new_val = read_live_in(ctx, def, block, i);
|
|
|
|
|
|
|
|
if (!new_val) {
|
|
|
|
new_val = ralloc(ctx, struct reg_or_immed);
|
2021-10-11 16:02:28 +01:00
|
|
|
if (interval->can_rematerialize)
|
|
|
|
new_val->def = rematerialize(def, NULL, pred);
|
|
|
|
else
|
|
|
|
new_val->def = reload(ctx, def, NULL, pred);
|
2021-07-23 12:12:30 +01:00
|
|
|
new_val->flags = new_val->def->flags;
|
|
|
|
}
|
|
|
|
live_in_rewrite(ctx, interval, new_val, block, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
reload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
|
|
|
|
interval.node) {
|
|
|
|
reload_live_in(ctx, interval->interval.reg, block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
add_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[def->name];
|
|
|
|
if (!interval->interval.inserted)
|
|
|
|
return;
|
|
|
|
|
|
|
|
bool needs_phi = false;
|
|
|
|
struct ir3_register *cur_def = NULL;
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
|
|
|
|
if (!state->visited) {
|
|
|
|
needs_phi = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct hash_entry *entry =
|
|
|
|
_mesa_hash_table_search(state->remap, def);
|
|
|
|
assert(entry);
|
|
|
|
struct reg_or_immed *pred_val = entry->data;
|
|
|
|
if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) ||
|
|
|
|
!pred_val->def ||
|
|
|
|
(cur_def && cur_def != pred_val->def)) {
|
|
|
|
needs_phi = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
cur_def = pred_val->def;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!needs_phi) {
|
|
|
|
interval->dst.def = cur_def;
|
|
|
|
interval->dst.flags = cur_def->flags;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ir3_instruction *phi =
|
|
|
|
ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
|
|
|
|
struct ir3_register *dst = __ssa_dst(phi);
|
|
|
|
dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
|
|
|
|
dst->size = def->size;
|
|
|
|
dst->wrmask = def->wrmask;
|
|
|
|
|
|
|
|
dst->interval_start = def->interval_start;
|
|
|
|
dst->interval_end = def->interval_end;
|
|
|
|
dst->merge_set = def->merge_set;
|
|
|
|
dst->merge_set_offset = def->merge_set_offset;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags);
|
|
|
|
src->size = def->size;
|
|
|
|
src->wrmask = def->wrmask;
|
|
|
|
|
|
|
|
if (state->visited) {
|
|
|
|
struct hash_entry *entry =
|
|
|
|
_mesa_hash_table_search(state->remap, def);
|
|
|
|
assert(entry);
|
|
|
|
struct reg_or_immed *new_val = entry->data;
|
|
|
|
set_src_val(src, new_val);
|
|
|
|
} else {
|
|
|
|
src->def = def;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
interval->dst.def = dst;
|
|
|
|
interval->dst.flags = dst->flags;
|
|
|
|
|
|
|
|
ir3_instr_move_before_block(phi, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* When spilling a block with a single predecessors, the pred may have other
|
|
|
|
* successors so we can't choose what's live in and we can't spill/restore
|
|
|
|
* anything. Just make the inserted intervals exactly match the predecessor. If
|
|
|
|
* it wasn't live in the predecessor then it must've already been spilled. Also,
|
|
|
|
* there are no phi nodes and no live-ins.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
spill_single_pred_live_in(struct ra_spill_ctx *ctx,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
unsigned name;
|
|
|
|
BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
|
|
|
|
ctx->live->definitions_count) {
|
|
|
|
struct ir3_register *reg = ctx->live->definitions[name];
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[reg->name];
|
|
|
|
struct reg_or_immed *val = read_live_in(ctx, reg, block, 0);
|
|
|
|
if (val)
|
|
|
|
interval->dst = *val;
|
|
|
|
else
|
|
|
|
ra_spill_ctx_remove(ctx, interval);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) {
|
|
|
|
phi->flags |= IR3_INSTR_UNUSED;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
|
|
|
|
if (!state->visited)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct ir3_register *src = phi->srcs[i];
|
|
|
|
if (!src->def)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct hash_entry *entry =
|
|
|
|
_mesa_hash_table_search(state->remap, src->def);
|
|
|
|
assert(entry);
|
|
|
|
struct reg_or_immed *new_val = entry->data;
|
|
|
|
set_src_val(src, new_val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ir3_register *def = interval->interval.reg;
|
|
|
|
|
2021-10-11 16:02:28 +01:00
|
|
|
if (interval->interval.reg->merge_set ||
|
|
|
|
!interval->can_rematerialize)
|
|
|
|
spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block);
|
2021-07-23 12:12:30 +01:00
|
|
|
ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
spill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[block->index];
|
|
|
|
rb_tree_foreach_safe (struct ra_spill_interval, interval,
|
|
|
|
&ctx->reg_ctx.intervals, interval.node) {
|
|
|
|
if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) {
|
|
|
|
spill_live_out(ctx, interval, block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
reload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def,
|
|
|
|
struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[def->name];
|
|
|
|
ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
|
|
|
|
|
|
|
|
reload_def(ctx, def, NULL, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
reload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[block->index];
|
|
|
|
unsigned name;
|
|
|
|
BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) {
|
|
|
|
struct ir3_register *reg = ctx->live->definitions[name];
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[name];
|
|
|
|
if (!interval->interval.inserted)
|
|
|
|
reload_live_out(ctx, reg, block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
update_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
assert(!block->successors[1]);
|
|
|
|
struct ir3_block *succ = block->successors[0];
|
|
|
|
unsigned pred_idx = ir3_block_get_pred_index(succ, block);
|
|
|
|
|
|
|
|
foreach_instr (instr, &succ->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_PHI)
|
|
|
|
break;
|
|
|
|
|
|
|
|
struct ir3_register *def = instr->srcs[pred_idx]->def;
|
|
|
|
if (!def)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct ra_spill_interval *interval = ctx->intervals[def->name];
|
|
|
|
if (!interval->interval.inserted)
|
|
|
|
continue;
|
|
|
|
set_src_val(instr->srcs[pred_idx], &interval->dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
record_pred_live_out(struct ra_spill_ctx *ctx,
|
|
|
|
struct ra_spill_interval *interval,
|
|
|
|
struct ir3_block *block, unsigned pred_idx)
|
|
|
|
{
|
|
|
|
struct ir3_block *pred = block->predecessors[pred_idx];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
|
|
|
|
struct ir3_register *def = interval->interval.reg;
|
|
|
|
if (is_live_in_phi(def, block)) {
|
|
|
|
def = def->instr->srcs[pred_idx]->def;
|
|
|
|
}
|
|
|
|
BITSET_SET(state->live_out, def->name);
|
|
|
|
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, child,
|
|
|
|
&interval->interval.children, interval.node) {
|
|
|
|
record_pred_live_out(ctx, child, block, pred_idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
record_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
|
|
|
struct ir3_block *pred = block->predecessors[i];
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[pred->index];
|
|
|
|
if (state->visited)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
state->live_out = rzalloc_array(ctx, BITSET_WORD,
|
|
|
|
BITSET_WORDS(ctx->live->definitions_count));
|
|
|
|
|
|
|
|
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, interval,
|
|
|
|
&ctx->reg_ctx.intervals, interval.node) {
|
|
|
|
record_pred_live_out(ctx, interval, block, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
record_live_out(struct ra_spill_ctx *ctx,
|
|
|
|
struct ra_spill_block_state *state,
|
|
|
|
struct ra_spill_interval *interval)
|
|
|
|
{
|
|
|
|
if (!(interval->dst.flags & IR3_REG_SSA) ||
|
|
|
|
interval->dst.def) {
|
|
|
|
struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed);
|
|
|
|
*val = interval->dst;
|
|
|
|
_mesa_hash_table_insert(state->remap, interval->interval.reg, val);
|
|
|
|
}
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, child,
|
|
|
|
&interval->interval.children, interval.node) {
|
|
|
|
record_live_out(ctx, state, child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
record_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
|
|
|
struct ra_spill_block_state *state = &ctx->blocks[block->index];
|
|
|
|
state->remap = _mesa_pointer_hash_table_create(ctx);
|
|
|
|
|
|
|
|
rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
|
|
|
|
interval.node) {
|
|
|
|
record_live_out(ctx, state, interval);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
|
|
|
|
{
|
2021-07-09 13:50:05 +01:00
|
|
|
memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
|
|
|
|
rb_tree_init(&ctx->reg_ctx.intervals);
|
2021-07-23 12:12:30 +01:00
|
|
|
rb_tree_init(&ctx->full_live_intervals);
|
|
|
|
rb_tree_init(&ctx->half_live_intervals);
|
2021-07-09 13:50:05 +01:00
|
|
|
|
|
|
|
unsigned name;
|
|
|
|
BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
|
|
|
|
ctx->live->definitions_count) {
|
|
|
|
struct ir3_register *reg = ctx->live->definitions[name];
|
2021-07-23 12:12:30 +01:00
|
|
|
handle_live_in(ctx, block, reg);
|
2021-07-09 13:50:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT &&
|
|
|
|
instr->opc != OPC_META_TEX_PREFETCH)
|
|
|
|
break;
|
|
|
|
handle_input_phi(ctx, instr);
|
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
if (ctx->spilling) {
|
|
|
|
if (block->predecessors_count == 1) {
|
|
|
|
spill_single_pred_live_in(ctx, block);
|
|
|
|
} else {
|
|
|
|
spill_live_ins(ctx, block);
|
|
|
|
reload_live_ins(ctx, block);
|
|
|
|
record_pred_live_outs(ctx, block);
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_PHI)
|
|
|
|
break;
|
|
|
|
rewrite_phi(ctx, instr, block);
|
|
|
|
}
|
|
|
|
BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
|
|
|
|
ctx->live->definitions_count) {
|
|
|
|
struct ir3_register *reg = ctx->live->definitions[name];
|
|
|
|
add_live_in_phi(ctx, reg, block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
update_max_pressure(ctx);
|
|
|
|
}
|
2021-07-09 13:50:05 +01:00
|
|
|
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
2021-07-23 12:12:30 +01:00
|
|
|
di(instr, "processing");
|
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
|
|
|
|
instr->opc == OPC_META_TEX_PREFETCH)
|
|
|
|
remove_input_phi(ctx, instr);
|
2021-07-23 12:12:30 +01:00
|
|
|
else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY)
|
|
|
|
handle_pcopy(ctx, instr);
|
|
|
|
else if (ctx->spilling && instr->opc == OPC_MOV &&
|
|
|
|
instr->dsts[0] == ctx->base_reg)
|
|
|
|
/* skip */;
|
2021-07-09 13:50:05 +01:00
|
|
|
else
|
|
|
|
handle_instr(ctx, instr);
|
|
|
|
}
|
2021-07-23 12:12:30 +01:00
|
|
|
|
|
|
|
if (ctx->spilling && block->successors[0]) {
|
|
|
|
struct ra_spill_block_state *state =
|
|
|
|
&ctx->blocks[block->successors[0]->index];
|
|
|
|
if (state->visited) {
|
|
|
|
assert(!block->successors[1]);
|
|
|
|
|
|
|
|
spill_live_outs(ctx, block);
|
|
|
|
reload_live_outs(ctx, block);
|
|
|
|
update_live_out_phis(ctx, block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->spilling) {
|
|
|
|
record_live_outs(ctx, block);
|
|
|
|
ctx->blocks[block->index].visited = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
simplify_phi_node(struct ir3_instruction *phi)
|
|
|
|
{
|
|
|
|
struct ir3_register *def = NULL;
|
|
|
|
foreach_src (src, phi) {
|
|
|
|
/* Ignore phi sources which point to the phi itself. */
|
|
|
|
if (src->def == phi->dsts[0])
|
|
|
|
continue;
|
|
|
|
/* If it's undef or it doesn't match the previous sources, bail */
|
|
|
|
if (!src->def || (def && def != src->def))
|
|
|
|
return false;
|
|
|
|
def = src->def;
|
|
|
|
}
|
|
|
|
|
|
|
|
phi->data = def;
|
|
|
|
phi->flags |= IR3_INSTR_UNUSED;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-11-24 16:51:19 +00:00
|
|
|
static struct ir3_register *
|
|
|
|
simplify_phi_def(struct ir3_register *def)
|
|
|
|
{
|
|
|
|
if (def->instr->opc == OPC_META_PHI) {
|
|
|
|
struct ir3_instruction *phi = def->instr;
|
|
|
|
|
|
|
|
/* Note: this function is always called at least once after visiting the
|
|
|
|
* phi, so either there has been a simplified phi in the meantime, in
|
|
|
|
* which case we will set progress=true and visit the definition again, or
|
|
|
|
* phi->data already has the most up-to-date value. Therefore we don't
|
|
|
|
* have to recursively check phi->data.
|
|
|
|
*/
|
|
|
|
if (phi->data)
|
|
|
|
return phi->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
return def;
|
|
|
|
}
|
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
static void
|
|
|
|
simplify_phi_srcs(struct ir3_instruction *instr)
|
|
|
|
{
|
|
|
|
foreach_src (src, instr) {
|
2021-11-24 16:51:19 +00:00
|
|
|
if (src->def)
|
|
|
|
src->def = simplify_phi_def(src->def);
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We insert phi nodes for all live-ins of loops in case we need to split the
|
|
|
|
* live range. This pass cleans that up for the case where the live range didn't
|
|
|
|
* actually need to be split.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
simplify_phi_nodes(struct ir3 *ir)
|
|
|
|
{
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_PHI)
|
|
|
|
break;
|
|
|
|
instr->data = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool progress;
|
|
|
|
do {
|
|
|
|
progress = false;
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
simplify_phi_srcs(instr);
|
|
|
|
}
|
|
|
|
|
2021-11-24 16:51:19 +00:00
|
|
|
/* Visit phi nodes in the sucessors to make sure that phi sources are
|
|
|
|
* always visited at least once after visiting the definition they
|
|
|
|
* point to. See note in simplify_phi_def() for why this is necessary.
|
|
|
|
*/
|
2021-07-23 12:12:30 +01:00
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
|
|
struct ir3_block *succ = block->successors[i];
|
|
|
|
if (!succ)
|
|
|
|
continue;
|
|
|
|
foreach_instr (instr, &succ->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_PHI)
|
|
|
|
break;
|
2021-11-24 16:51:19 +00:00
|
|
|
if (instr->flags & IR3_INSTR_UNUSED) {
|
|
|
|
if (instr->data)
|
|
|
|
instr->data = simplify_phi_def(instr->data);
|
|
|
|
} else {
|
|
|
|
simplify_phi_srcs(instr);
|
|
|
|
progress |= simplify_phi_node(instr);
|
|
|
|
}
|
2021-07-23 12:12:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (progress);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
unmark_dead(struct ir3 *ir)
|
|
|
|
{
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
instr->flags &= ~IR3_INSTR_UNUSED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark
|
|
|
|
* which ones are dead along the way, so there's nothing to compute here.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
cleanup_dead(struct ir3 *ir)
|
|
|
|
{
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
foreach_instr_safe (instr, &block->instr_list) {
|
|
|
|
if (instr->flags & IR3_INSTR_UNUSED)
|
|
|
|
list_delinit(&instr->node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Deal with merge sets after spilling. Spilling generally leaves the merge sets
|
|
|
|
* in a mess, and even if we properly cleaned up after ourselves, we would want
|
|
|
|
* to recompute the merge sets afterward anway. That's because
|
|
|
|
* spilling/reloading can "break up" phi webs and split/collect webs so that
|
|
|
|
* allocating them to the same register no longer gives any benefit. For
|
|
|
|
* example, imagine we have this:
|
|
|
|
*
|
|
|
|
* if (...) {
|
|
|
|
* foo = ...
|
|
|
|
* } else {
|
|
|
|
* bar = ...
|
|
|
|
* }
|
|
|
|
* baz = phi(foo, bar)
|
|
|
|
*
|
|
|
|
* and we spill "baz":
|
|
|
|
*
|
|
|
|
* if (...) {
|
|
|
|
* foo = ...
|
|
|
|
* spill(foo)
|
|
|
|
* } else {
|
|
|
|
* bar = ...
|
|
|
|
* spill(bar)
|
|
|
|
* }
|
|
|
|
* baz = reload()
|
|
|
|
*
|
|
|
|
* now foo, bar, and baz don't have to be allocated to the same register. How
|
|
|
|
* exactly the merge sets change can be complicated, so it's easier just to
|
|
|
|
* recompute them.
|
|
|
|
*
|
|
|
|
* However, there's a wrinkle in this: those same merge sets determine the
|
|
|
|
* register pressure, due to multiple values inhabiting the same register! And
|
|
|
|
* we assume that this sharing happens when spilling. Therefore we need a
|
|
|
|
* three-step procedure:
|
|
|
|
*
|
|
|
|
* 1. Drop the original merge sets.
|
|
|
|
* 2. Calculate which values *must* be merged, being careful to only use the
|
|
|
|
* interval information which isn't trashed by spilling, and forcibly merge
|
|
|
|
* them.
|
|
|
|
* 3. Let ir3_merge_regs() finish the job, including recalculating the
|
|
|
|
* intervals.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
fixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir)
|
|
|
|
{
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
ra_foreach_dst (dst, instr) {
|
|
|
|
dst->merge_set = NULL;
|
|
|
|
dst->merge_set_offset = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
|
|
if (instr->opc != OPC_META_SPLIT &&
|
|
|
|
instr->opc != OPC_META_COLLECT)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
struct ir3_register *dst = instr->dsts[0];
|
|
|
|
ra_foreach_src (src, instr) {
|
|
|
|
if (!(src->flags & IR3_REG_KILL) &&
|
|
|
|
src->def->interval_start < dst->interval_end &&
|
|
|
|
dst->interval_start < src->def->interval_end) {
|
|
|
|
ir3_force_merge(dst, src->def,
|
|
|
|
src->def->interval_start - dst->interval_start);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ir3_merge_regs(live, ir);
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
|
2021-07-09 13:50:05 +01:00
|
|
|
struct ir3_pressure *max_pressure)
|
2021-02-19 11:33:49 +00:00
|
|
|
{
|
2021-07-23 12:12:30 +01:00
|
|
|
struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx);
|
|
|
|
spill_ctx_init(ctx, v, live);
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-09 13:50:05 +01:00
|
|
|
foreach_block (block, &v->ir->block_list) {
|
2021-07-23 12:12:30 +01:00
|
|
|
handle_block(ctx, block);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ctx->cur_pressure.full == 0);
|
|
|
|
assert(ctx->cur_pressure.half == 0);
|
|
|
|
assert(ctx->cur_pressure.shared == 0);
|
|
|
|
|
|
|
|
*max_pressure = ctx->max_pressure;
|
|
|
|
ralloc_free(ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
|
|
|
|
struct ir3_liveness **live,
|
|
|
|
const struct ir3_pressure *limit_pressure)
|
|
|
|
{
|
2021-09-18 17:32:21 +01:00
|
|
|
void *mem_ctx = ralloc_parent(*live);
|
|
|
|
struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx);
|
2021-07-23 12:12:30 +01:00
|
|
|
spill_ctx_init(ctx, v, *live);
|
|
|
|
|
|
|
|
ctx->spilling = true;
|
|
|
|
|
|
|
|
ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state,
|
|
|
|
ctx->live->block_count);
|
|
|
|
rb_tree_init(&ctx->full_live_intervals);
|
|
|
|
rb_tree_init(&ctx->half_live_intervals);
|
|
|
|
|
|
|
|
ctx->limit_pressure = *limit_pressure;
|
|
|
|
ctx->spill_slot = v->pvtmem_size;
|
|
|
|
|
|
|
|
add_base_reg(ctx, ir);
|
|
|
|
compute_next_distance(ctx, ir);
|
|
|
|
|
|
|
|
unmark_dead(ir);
|
|
|
|
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
|
|
handle_block(ctx, block);
|
2021-07-09 13:50:05 +01:00
|
|
|
}
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
simplify_phi_nodes(ir);
|
|
|
|
|
|
|
|
cleanup_dead(ir);
|
|
|
|
|
|
|
|
ir3_create_parallel_copies(ir);
|
|
|
|
|
|
|
|
/* After this point, we're done mutating the IR. Liveness has been trashed,
|
|
|
|
* so recalculate it. We'll need it for recalculating the merge sets.
|
|
|
|
*/
|
|
|
|
ralloc_free(ctx->live);
|
2021-09-18 17:32:21 +01:00
|
|
|
*live = ir3_calc_liveness(mem_ctx, ir);
|
2021-07-23 12:12:30 +01:00
|
|
|
|
|
|
|
fixup_merge_sets(*live, ir);
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
v->pvtmem_size = ctx->spill_slot;
|
|
|
|
ralloc_free(ctx);
|
2021-02-19 11:33:49 +00:00
|
|
|
|
2021-07-23 12:12:30 +01:00
|
|
|
return true;
|
2021-02-19 11:33:49 +00:00
|
|
|
}
|