571 lines
17 KiB
C
571 lines
17 KiB
C
/*
|
|
* Copyright (C) 2021 Valve Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include "util/ralloc.h"
|
|
#include "ir3_ra.h"
|
|
#include "ir3_shader.h"
|
|
|
|
/* This file implements a validation pass for register allocation. We check
|
|
* that the assignment of SSA values to registers is "valid", in the sense
|
|
* that each original definition reaches all of its uses without being
|
|
* clobbered by something else.
|
|
*
|
|
* The validation is a forward dataflow analysis. The state at each point
|
|
* consists of, for each physical register, the SSA value occupying it, or a
|
|
* few special values:
|
|
*
|
|
* - "unknown" is set initially, before the dataflow analysis assigns it a
|
|
* value. This is the lattice bottom.
|
|
* - Values at the start get "undef", which acts like a special SSA value that
|
|
* indicates it is never written.
|
|
* - "overdefined" registers are set to more than one value, depending on
|
|
* which path you take to get to the spot. This is the lattice top.
|
|
*
|
|
* Overdefined is necessary to distinguish because in some programs, like this
|
|
* simple example, it's perfectly normal and allowed:
|
|
*
|
|
* if (...) {
|
|
* mov.u32u32 ssa_1(r1.x), ...
|
|
* ...
|
|
* } else {
|
|
* mov.u32u32 ssa_2(r1.x), ...
|
|
* ...
|
|
* }
|
|
* // r1.x is overdefined here!
|
|
*
|
|
* However, if an ssa value after the if is accidentally assigned to r1.x, we
|
|
* need to remember that it's invalid to catch the mistake. Overdef has to be
|
|
* distinguished from undef so that the state forms a valid lattice to
|
|
* guarantee that the analysis always terminates. We could avoid relying on
|
|
* overdef by using liveness analysis, but not relying on liveness has the
|
|
* benefit that we can catch bugs in liveness analysis too.
|
|
*
|
|
* One tricky thing we have to handle is the coalescing of splits/collects,
|
|
* which means that multiple SSA values can occupy a register at the same
|
|
* time. While we could use the same merge set indices that RA uses, again
|
|
* that would rely on the merge set calculation being correct which we don't
|
|
* want to. Instead we treat splits/collects as transfer instructions, similar
|
|
* to the parallelcopy instructions inserted by RA, and have them copy their
|
|
* sources to their destinations. This means that each physreg must carry the
|
|
* SSA def assigned to it plus an offset into that definition, and when
|
|
* validating sources we must look through splits/collects to find the
|
|
* "original" source for each subregister.
|
|
*/
|
|
|
|
#define UNKNOWN ((struct ir3_register *)NULL)
|
|
#define UNDEF ((struct ir3_register *)(uintptr_t)1)
|
|
#define OVERDEF ((struct ir3_register *)(uintptr_t)2)
|
|
|
|
struct reg_state {
|
|
struct ir3_register *def;
|
|
unsigned offset;
|
|
};
|
|
|
|
struct file_state {
|
|
struct reg_state regs[RA_MAX_FILE_SIZE];
|
|
};
|
|
|
|
struct reaching_state {
|
|
struct file_state half, full, shared;
|
|
};
|
|
|
|
struct ra_val_ctx {
|
|
struct ir3_instruction *current_instr;
|
|
|
|
struct reaching_state reaching;
|
|
struct reaching_state *block_reaching;
|
|
unsigned block_count;
|
|
|
|
unsigned full_size, half_size;
|
|
|
|
bool merged_regs;
|
|
|
|
bool failed;
|
|
};
|
|
|
|
static void
|
|
validate_error(struct ra_val_ctx *ctx, const char *condstr)
|
|
{
|
|
fprintf(stderr, "ra validation fail: %s\n", condstr);
|
|
fprintf(stderr, " -> for instruction: ");
|
|
ir3_print_instr(ctx->current_instr);
|
|
abort();
|
|
}
|
|
|
|
#define validate_assert(ctx, cond) \
|
|
do { \
|
|
if (!(cond)) { \
|
|
validate_error(ctx, #cond); \
|
|
} \
|
|
} while (0)
|
|
|
|
static unsigned
|
|
get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
|
|
{
|
|
if (reg->flags & IR3_REG_SHARED)
|
|
return RA_SHARED_SIZE;
|
|
else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
|
|
return ctx->full_size;
|
|
else
|
|
return ctx->half_size;
|
|
}
|
|
|
|
/* Validate simple things, like the registers being in-bounds. This way we
|
|
* don't have to worry about out-of-bounds accesses later.
|
|
*/
|
|
|
|
static void
|
|
validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
|
|
{
|
|
ctx->current_instr = instr;
|
|
ra_foreach_dst (dst, instr) {
|
|
unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
|
|
validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
|
|
if (dst->tied)
|
|
validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
|
|
}
|
|
|
|
ra_foreach_src (src, instr) {
|
|
unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
|
|
validate_assert(ctx, src_max <= get_file_size(ctx, src));
|
|
}
|
|
}
|
|
|
|
/* This is the lattice operator. */
|
|
static bool
|
|
merge_reg(struct reg_state *dst, const struct reg_state *src)
|
|
{
|
|
if (dst->def == UNKNOWN) {
|
|
*dst = *src;
|
|
return src->def != UNKNOWN;
|
|
} else if (dst->def == OVERDEF) {
|
|
return false;
|
|
} else {
|
|
if (src->def == UNKNOWN)
|
|
return false;
|
|
else if (src->def == OVERDEF) {
|
|
*dst = *src;
|
|
return true;
|
|
} else {
|
|
if (dst->def != src->def || dst->offset != src->offset) {
|
|
dst->def = OVERDEF;
|
|
dst->offset = 0;
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
|
|
{
|
|
bool progress = false;
|
|
for (unsigned i = 0; i < size; i++)
|
|
progress |= merge_reg(&dst->regs[i], &src->regs[i]);
|
|
return progress;
|
|
}
|
|
|
|
static bool
|
|
merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
|
|
const struct reaching_state *src)
|
|
{
|
|
bool progress = false;
|
|
progress |= merge_file(&dst->full, &src->full, ctx->full_size);
|
|
progress |= merge_file(&dst->half, &src->half, ctx->half_size);
|
|
return progress;
|
|
}
|
|
|
|
static bool
|
|
merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
|
|
const struct reaching_state *src)
|
|
{
|
|
return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
|
|
}
|
|
|
|
static struct file_state *
|
|
ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
|
|
{
|
|
if (reg->flags & IR3_REG_SHARED)
|
|
return &ctx->reaching.shared;
|
|
else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
|
|
return &ctx->reaching.full;
|
|
else
|
|
return &ctx->reaching.half;
|
|
}
|
|
|
|
static void
|
|
propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
|
|
{
|
|
ra_foreach_dst (dst, instr) {
|
|
struct file_state *file = ra_val_get_file(ctx, dst);
|
|
physreg_t physreg = ra_reg_get_physreg(dst);
|
|
for (unsigned i = 0; i < reg_size(dst); i++) {
|
|
file->regs[physreg + i] = (struct reg_state){
|
|
.def = dst,
|
|
.offset = i,
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
|
|
{
|
|
struct ir3_register *dst = split->dsts[0];
|
|
struct ir3_register *src = split->srcs[0];
|
|
physreg_t dst_physreg = ra_reg_get_physreg(dst);
|
|
physreg_t src_physreg = ra_reg_get_physreg(src);
|
|
struct file_state *file = ra_val_get_file(ctx, dst);
|
|
|
|
unsigned offset = split->split.off * reg_elem_size(src);
|
|
for (unsigned i = 0; i < reg_elem_size(src); i++) {
|
|
file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
|
|
}
|
|
}
|
|
|
|
static void
|
|
propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
|
|
{
|
|
struct ir3_register *dst = collect->dsts[0];
|
|
physreg_t dst_physreg = ra_reg_get_physreg(dst);
|
|
struct file_state *file = ra_val_get_file(ctx, dst);
|
|
|
|
unsigned size = reg_size(dst);
|
|
struct reg_state srcs[size];
|
|
|
|
for (unsigned i = 0; i < collect->srcs_count; i++) {
|
|
struct ir3_register *src = collect->srcs[i];
|
|
unsigned dst_offset = i * reg_elem_size(dst);
|
|
for (unsigned j = 0; j < reg_elem_size(dst); j++) {
|
|
if (!ra_reg_is_src(src)) {
|
|
srcs[dst_offset + j] = (struct reg_state){
|
|
.def = dst,
|
|
.offset = dst_offset + j,
|
|
};
|
|
} else {
|
|
physreg_t src_physreg = ra_reg_get_physreg(src);
|
|
srcs[dst_offset + j] = file->regs[src_physreg + j];
|
|
}
|
|
}
|
|
}
|
|
|
|
for (unsigned i = 0; i < size; i++)
|
|
file->regs[dst_physreg + i] = srcs[i];
|
|
}
|
|
|
|
static void
|
|
propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
|
|
{
|
|
unsigned size = 0;
|
|
for (unsigned i = 0; i < pcopy->dsts_count; i++) {
|
|
size += reg_size(pcopy->srcs[i]);
|
|
}
|
|
|
|
struct reg_state srcs[size];
|
|
|
|
unsigned offset = 0;
|
|
for (unsigned i = 0; i < pcopy->srcs_count; i++) {
|
|
struct ir3_register *dst = pcopy->dsts[i];
|
|
struct ir3_register *src = pcopy->srcs[i];
|
|
struct file_state *file = ra_val_get_file(ctx, dst);
|
|
|
|
for (unsigned j = 0; j < reg_size(dst); j++) {
|
|
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
|
|
srcs[offset + j] = (struct reg_state){
|
|
.def = dst,
|
|
.offset = j,
|
|
};
|
|
} else {
|
|
physreg_t src_physreg = ra_reg_get_physreg(src);
|
|
srcs[offset + j] = file->regs[src_physreg + j];
|
|
}
|
|
}
|
|
|
|
offset += reg_size(dst);
|
|
}
|
|
assert(offset == size);
|
|
|
|
offset = 0;
|
|
for (unsigned i = 0; i < pcopy->dsts_count; i++) {
|
|
struct ir3_register *dst = pcopy->dsts[i];
|
|
physreg_t dst_physreg = ra_reg_get_physreg(dst);
|
|
struct file_state *file = ra_val_get_file(ctx, dst);
|
|
|
|
for (unsigned j = 0; j < reg_size(dst); j++)
|
|
file->regs[dst_physreg + j] = srcs[offset + j];
|
|
|
|
offset += reg_size(dst);
|
|
}
|
|
assert(offset == size);
|
|
}
|
|
|
|
static void
|
|
propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
|
|
{
|
|
if (instr->opc == OPC_META_SPLIT)
|
|
propagate_split(ctx, instr);
|
|
else if (instr->opc == OPC_META_COLLECT)
|
|
propagate_collect(ctx, instr);
|
|
else if (instr->opc == OPC_META_PARALLEL_COPY)
|
|
propagate_parallelcopy(ctx, instr);
|
|
else
|
|
propagate_normal_instr(ctx, instr);
|
|
}
|
|
|
|
static bool
|
|
propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
|
|
{
|
|
ctx->reaching = ctx->block_reaching[block->index];
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
propagate_instr(ctx, instr);
|
|
}
|
|
|
|
bool progress = false;
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
struct ir3_block *succ = block->successors[i];
|
|
if (!succ)
|
|
continue;
|
|
progress |=
|
|
merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
|
|
}
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
struct ir3_block *succ = block->physical_successors[i];
|
|
if (!succ)
|
|
continue;
|
|
progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
|
|
&ctx->reaching);
|
|
}
|
|
return progress;
|
|
}
|
|
|
|
static void
|
|
chase_definition(struct reg_state *state)
|
|
{
|
|
while (true) {
|
|
struct ir3_instruction *instr = state->def->instr;
|
|
switch (instr->opc) {
|
|
case OPC_META_SPLIT: {
|
|
struct ir3_register *new_def = instr->srcs[0]->def;
|
|
unsigned offset = instr->split.off * reg_elem_size(new_def);
|
|
*state = (struct reg_state){
|
|
.def = new_def,
|
|
.offset = state->offset + offset,
|
|
};
|
|
break;
|
|
}
|
|
case OPC_META_COLLECT: {
|
|
unsigned src_idx = state->offset / reg_elem_size(state->def);
|
|
unsigned src_offset = state->offset % reg_elem_size(state->def);
|
|
struct ir3_register *new_def = instr->srcs[src_idx]->def;
|
|
if (new_def) {
|
|
*state = (struct reg_state){
|
|
.def = new_def,
|
|
.offset = src_offset,
|
|
};
|
|
} else {
|
|
/* Bail on immed/const */
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
case OPC_META_PARALLEL_COPY: {
|
|
unsigned dst_idx = ~0;
|
|
for (unsigned i = 0; i < instr->dsts_count; i++) {
|
|
if (instr->dsts[i] == state->def) {
|
|
dst_idx = i;
|
|
break;
|
|
}
|
|
}
|
|
assert(dst_idx != ~0);
|
|
|
|
struct ir3_register *new_def = instr->srcs[dst_idx]->def;
|
|
if (new_def) {
|
|
state->def = new_def;
|
|
} else {
|
|
/* Bail on immed/const */
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
dump_reg_state(struct reg_state *state)
|
|
{
|
|
if (state->def == UNDEF) {
|
|
fprintf(stderr, "no reaching definition");
|
|
} else if (state->def == OVERDEF) {
|
|
fprintf(stderr,
|
|
"more than one reaching definition or partial definition");
|
|
} else {
|
|
/* The analysis should always remove UNKNOWN eventually. */
|
|
assert(state->def != UNKNOWN);
|
|
|
|
fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
|
|
state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
|
|
state->def->num / 4, "xyzw"[state->def->num % 4],
|
|
state -> offset);
|
|
}
|
|
}
|
|
|
|
static void
|
|
check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
|
|
struct ir3_register *src)
|
|
{
|
|
struct file_state *file = ra_val_get_file(ctx, src);
|
|
physreg_t physreg = ra_reg_get_physreg(src);
|
|
for (unsigned i = 0; i < reg_size(src); i++) {
|
|
struct reg_state expected = (struct reg_state){
|
|
.def = src->def,
|
|
.offset = i,
|
|
};
|
|
chase_definition(&expected);
|
|
|
|
struct reg_state actual = file->regs[physreg + i];
|
|
|
|
if (expected.def != actual.def || expected.offset != actual.offset) {
|
|
fprintf(
|
|
stderr,
|
|
"ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
|
|
src->def->instr->serialno, src->def->name, i);
|
|
fprintf(stderr, "expected: ");
|
|
dump_reg_state(&expected);
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "actual: ");
|
|
dump_reg_state(&actual);
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "-> for instruction: ");
|
|
ir3_print_instr(instr);
|
|
ctx->failed = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
|
|
{
|
|
if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
|
|
instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
|
|
return;
|
|
}
|
|
|
|
ra_foreach_src (src, instr) {
|
|
check_reaching_src(ctx, instr, src);
|
|
}
|
|
}
|
|
|
|
static void
|
|
check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
|
|
{
|
|
ctx->reaching = ctx->block_reaching[block->index];
|
|
|
|
foreach_instr (instr, &block->instr_list) {
|
|
check_reaching_instr(ctx, instr);
|
|
propagate_instr(ctx, instr);
|
|
}
|
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
struct ir3_block *succ = block->successors[i];
|
|
if (!succ)
|
|
continue;
|
|
|
|
unsigned pred_idx = ir3_block_get_pred_index(succ, block);
|
|
foreach_instr (instr, &succ->instr_list) {
|
|
if (instr->opc != OPC_META_PHI)
|
|
break;
|
|
if (instr->srcs[pred_idx]->def)
|
|
check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
|
|
{
|
|
ctx->block_reaching =
|
|
rzalloc_array(ctx, struct reaching_state, ctx->block_count);
|
|
|
|
struct reaching_state *start = &ctx->block_reaching[0];
|
|
for (unsigned i = 0; i < ctx->full_size; i++)
|
|
start->full.regs[i].def = UNDEF;
|
|
for (unsigned i = 0; i < ctx->half_size; i++)
|
|
start->half.regs[i].def = UNDEF;
|
|
for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
|
|
start->shared.regs[i].def = UNDEF;
|
|
|
|
bool progress;
|
|
do {
|
|
progress = false;
|
|
foreach_block (block, &ir->block_list) {
|
|
progress |= propagate_block(ctx, block);
|
|
}
|
|
} while (progress);
|
|
|
|
foreach_block (block, &ir->block_list) {
|
|
check_reaching_block(ctx, block);
|
|
}
|
|
|
|
if (ctx->failed) {
|
|
fprintf(stderr, "failing shader:\n");
|
|
ir3_print(ir);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
void
|
|
ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
|
|
unsigned half_size, unsigned block_count)
|
|
{
|
|
#ifdef NDEBUG
|
|
#define VALIDATE 0
|
|
#else
|
|
#define VALIDATE 1
|
|
#endif
|
|
|
|
if (!VALIDATE)
|
|
return;
|
|
|
|
struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
|
|
ctx->merged_regs = v->mergedregs;
|
|
ctx->full_size = full_size;
|
|
ctx->half_size = half_size;
|
|
ctx->block_count = block_count;
|
|
|
|
foreach_block (block, &v->ir->block_list) {
|
|
foreach_instr (instr, &block->instr_list) {
|
|
validate_simple(ctx, instr);
|
|
}
|
|
}
|
|
|
|
check_reaching_defs(ctx, v->ir);
|
|
|
|
ralloc_free(ctx);
|
|
}
|