/* * Copyright (C) 2021 Valve Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "util/ralloc.h" #include "ir3_ra.h" #include "ir3_shader.h" /* This file implements a validation pass for register allocation. We check * that the assignment of SSA values to registers is "valid", in the sense * that each original definition reaches all of its uses without being * clobbered by something else. * * The validation is a forward dataflow analysis. The state at each point * consists of, for each physical register, the SSA value occupying it, or a * few special values: * * - "unknown" is set initially, before the dataflow analysis assigns it a * value. This is the lattice bottom. * - Values at the start get "undef", which acts like a special SSA value that * indicates it is never written. * - "overdefined" registers are set to more than one value, depending on * which path you take to get to the spot. This is the lattice top. * * Overdefined is necessary to distinguish because in some programs, like this * simple example, it's perfectly normal and allowed: * * if (...) { * mov.u32u32 ssa_1(r1.x), ... * ... * } else { * mov.u32u32 ssa_2(r1.x), ... * ... * } * // r1.x is overdefined here! * * However, if an ssa value after the if is accidentally assigned to r1.x, we * need to remember that it's invalid to catch the mistake. Overdef has to be * distinguished from undef so that the state forms a valid lattice to * guarantee that the analysis always terminates. We could avoid relying on * overdef by using liveness analysis, but not relying on liveness has the * benefit that we can catch bugs in liveness analysis too. * * One tricky thing we have to handle is the coalescing of splits/collects, * which means that multiple SSA values can occupy a register at the same * time. While we could use the same merge set indices that RA uses, again * that would rely on the merge set calculation being correct which we don't * want to. Instead we treat splits/collects as transfer instructions, similar * to the parallelcopy instructions inserted by RA, and have them copy their * sources to their destinations. This means that each physreg must carry the * SSA def assigned to it plus an offset into that definition, and when * validating sources we must look through splits/collects to find the * "original" source for each subregister. */ #define UNKNOWN ((struct ir3_register *)NULL) #define UNDEF ((struct ir3_register *)(uintptr_t)1) #define OVERDEF ((struct ir3_register *)(uintptr_t)2) struct reg_state { struct ir3_register *def; unsigned offset; }; struct file_state { struct reg_state regs[RA_MAX_FILE_SIZE]; }; struct reaching_state { struct file_state half, full, shared; }; struct ra_val_ctx { struct ir3_instruction *current_instr; struct reaching_state reaching; struct reaching_state *block_reaching; unsigned block_count; unsigned full_size, half_size; bool merged_regs; bool failed; }; static void validate_error(struct ra_val_ctx *ctx, const char *condstr) { fprintf(stderr, "ra validation fail: %s\n", condstr); fprintf(stderr, " -> for instruction: "); ir3_print_instr(ctx->current_instr); abort(); } #define validate_assert(ctx, cond) \ do { \ if (!(cond)) { \ validate_error(ctx, #cond); \ } \ } while (0) static unsigned get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg) { if (reg->flags & IR3_REG_SHARED) return RA_SHARED_SIZE; else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) return ctx->full_size; else return ctx->half_size; } /* Validate simple things, like the registers being in-bounds. This way we * don't have to worry about out-of-bounds accesses later. */ static void validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { ctx->current_instr = instr; ra_foreach_dst (dst, instr) { unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst); validate_assert(ctx, dst_max <= get_file_size(ctx, dst)); if (dst->tied) validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied)); } ra_foreach_src (src, instr) { unsigned src_max = ra_reg_get_physreg(src) + reg_size(src); validate_assert(ctx, src_max <= get_file_size(ctx, src)); } } /* This is the lattice operator. */ static bool merge_reg(struct reg_state *dst, const struct reg_state *src) { if (dst->def == UNKNOWN) { *dst = *src; return src->def != UNKNOWN; } else if (dst->def == OVERDEF) { return false; } else { if (src->def == UNKNOWN) return false; else if (src->def == OVERDEF) { *dst = *src; return true; } else { if (dst->def != src->def || dst->offset != src->offset) { dst->def = OVERDEF; dst->offset = 0; return true; } else { return false; } } } } static bool merge_file(struct file_state *dst, const struct file_state *src, unsigned size) { bool progress = false; for (unsigned i = 0; i < size; i++) progress |= merge_reg(&dst->regs[i], &src->regs[i]); return progress; } static bool merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst, const struct reaching_state *src) { bool progress = false; progress |= merge_file(&dst->full, &src->full, ctx->full_size); progress |= merge_file(&dst->half, &src->half, ctx->half_size); return progress; } static bool merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst, const struct reaching_state *src) { return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); } static struct file_state * ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg) { if (reg->flags & IR3_REG_SHARED) return &ctx->reaching.shared; else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) return &ctx->reaching.full; else return &ctx->reaching.half; } static void propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { ra_foreach_dst (dst, instr) { struct file_state *file = ra_val_get_file(ctx, dst); physreg_t physreg = ra_reg_get_physreg(dst); for (unsigned i = 0; i < reg_size(dst); i++) { file->regs[physreg + i] = (struct reg_state){ .def = dst, .offset = i, }; } } } static void propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split) { struct ir3_register *dst = split->dsts[0]; struct ir3_register *src = split->srcs[0]; physreg_t dst_physreg = ra_reg_get_physreg(dst); physreg_t src_physreg = ra_reg_get_physreg(src); struct file_state *file = ra_val_get_file(ctx, dst); unsigned offset = split->split.off * reg_elem_size(src); for (unsigned i = 0; i < reg_elem_size(src); i++) { file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i]; } } static void propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect) { struct ir3_register *dst = collect->dsts[0]; physreg_t dst_physreg = ra_reg_get_physreg(dst); struct file_state *file = ra_val_get_file(ctx, dst); unsigned size = reg_size(dst); struct reg_state srcs[size]; for (unsigned i = 0; i < collect->srcs_count; i++) { struct ir3_register *src = collect->srcs[i]; unsigned dst_offset = i * reg_elem_size(dst); for (unsigned j = 0; j < reg_elem_size(dst); j++) { if (!ra_reg_is_src(src)) { srcs[dst_offset + j] = (struct reg_state){ .def = dst, .offset = dst_offset + j, }; } else { physreg_t src_physreg = ra_reg_get_physreg(src); srcs[dst_offset + j] = file->regs[src_physreg + j]; } } } for (unsigned i = 0; i < size; i++) file->regs[dst_physreg + i] = srcs[i]; } static void propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) { unsigned size = 0; for (unsigned i = 0; i < pcopy->dsts_count; i++) { size += reg_size(pcopy->srcs[i]); } struct reg_state srcs[size]; unsigned offset = 0; for (unsigned i = 0; i < pcopy->srcs_count; i++) { struct ir3_register *dst = pcopy->dsts[i]; struct ir3_register *src = pcopy->srcs[i]; struct file_state *file = ra_val_get_file(ctx, dst); for (unsigned j = 0; j < reg_size(dst); j++) { if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { srcs[offset + j] = (struct reg_state){ .def = dst, .offset = j, }; } else { physreg_t src_physreg = ra_reg_get_physreg(src); srcs[offset + j] = file->regs[src_physreg + j]; } } offset += reg_size(dst); } assert(offset == size); offset = 0; for (unsigned i = 0; i < pcopy->dsts_count; i++) { struct ir3_register *dst = pcopy->dsts[i]; physreg_t dst_physreg = ra_reg_get_physreg(dst); struct file_state *file = ra_val_get_file(ctx, dst); for (unsigned j = 0; j < reg_size(dst); j++) file->regs[dst_physreg + j] = srcs[offset + j]; offset += reg_size(dst); } assert(offset == size); } static void propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { if (instr->opc == OPC_META_SPLIT) propagate_split(ctx, instr); else if (instr->opc == OPC_META_COLLECT) propagate_collect(ctx, instr); else if (instr->opc == OPC_META_PARALLEL_COPY) propagate_parallelcopy(ctx, instr); else propagate_normal_instr(ctx, instr); } static bool propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) { ctx->reaching = ctx->block_reaching[block->index]; foreach_instr (instr, &block->instr_list) { propagate_instr(ctx, instr); } bool progress = false; for (unsigned i = 0; i < 2; i++) { struct ir3_block *succ = block->successors[i]; if (!succ) continue; progress |= merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); } for (unsigned i = 0; i < 2; i++) { struct ir3_block *succ = block->physical_successors[i]; if (!succ) continue; progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); } return progress; } static void chase_definition(struct reg_state *state) { while (true) { struct ir3_instruction *instr = state->def->instr; switch (instr->opc) { case OPC_META_SPLIT: { struct ir3_register *new_def = instr->srcs[0]->def; unsigned offset = instr->split.off * reg_elem_size(new_def); *state = (struct reg_state){ .def = new_def, .offset = state->offset + offset, }; break; } case OPC_META_COLLECT: { unsigned src_idx = state->offset / reg_elem_size(state->def); unsigned src_offset = state->offset % reg_elem_size(state->def); struct ir3_register *new_def = instr->srcs[src_idx]->def; if (new_def) { *state = (struct reg_state){ .def = new_def, .offset = src_offset, }; } else { /* Bail on immed/const */ return; } break; } case OPC_META_PARALLEL_COPY: { unsigned dst_idx = ~0; for (unsigned i = 0; i < instr->dsts_count; i++) { if (instr->dsts[i] == state->def) { dst_idx = i; break; } } assert(dst_idx != ~0); struct ir3_register *new_def = instr->srcs[dst_idx]->def; if (new_def) { state->def = new_def; } else { /* Bail on immed/const */ return; } break; } default: return; } } } static void dump_reg_state(struct reg_state *state) { if (state->def == UNDEF) { fprintf(stderr, "no reaching definition"); } else if (state->def == OVERDEF) { fprintf(stderr, "more than one reaching definition or partial definition"); } else { /* The analysis should always remove UNKNOWN eventually. */ assert(state->def != UNKNOWN); fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno, state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "", state->def->num / 4, "xyzw"[state->def->num % 4], state -> offset); } } static void check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *src) { struct file_state *file = ra_val_get_file(ctx, src); physreg_t physreg = ra_reg_get_physreg(src); for (unsigned i = 0; i < reg_size(src); i++) { struct reg_state expected = (struct reg_state){ .def = src->def, .offset = i, }; chase_definition(&expected); struct reg_state actual = file->regs[physreg + i]; if (expected.def != actual.def || expected.offset != actual.offset) { fprintf( stderr, "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n", src->def->instr->serialno, src->def->name, i); fprintf(stderr, "expected: "); dump_reg_state(&expected); fprintf(stderr, "\n"); fprintf(stderr, "actual: "); dump_reg_state(&actual); fprintf(stderr, "\n"); fprintf(stderr, "-> for instruction: "); ir3_print_instr(instr); ctx->failed = true; } } } static void check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) { if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT || instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) { return; } ra_foreach_src (src, instr) { check_reaching_src(ctx, instr, src); } } static void check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block) { ctx->reaching = ctx->block_reaching[block->index]; foreach_instr (instr, &block->instr_list) { check_reaching_instr(ctx, instr); propagate_instr(ctx, instr); } for (unsigned i = 0; i < 2; i++) { struct ir3_block *succ = block->successors[i]; if (!succ) continue; unsigned pred_idx = ir3_block_get_pred_index(succ, block); foreach_instr (instr, &succ->instr_list) { if (instr->opc != OPC_META_PHI) break; if (instr->srcs[pred_idx]->def) check_reaching_src(ctx, instr, instr->srcs[pred_idx]); } } } static void check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir) { ctx->block_reaching = rzalloc_array(ctx, struct reaching_state, ctx->block_count); struct reaching_state *start = &ctx->block_reaching[0]; for (unsigned i = 0; i < ctx->full_size; i++) start->full.regs[i].def = UNDEF; for (unsigned i = 0; i < ctx->half_size; i++) start->half.regs[i].def = UNDEF; for (unsigned i = 0; i < RA_SHARED_SIZE; i++) start->shared.regs[i].def = UNDEF; bool progress; do { progress = false; foreach_block (block, &ir->block_list) { progress |= propagate_block(ctx, block); } } while (progress); foreach_block (block, &ir->block_list) { check_reaching_block(ctx, block); } if (ctx->failed) { fprintf(stderr, "failing shader:\n"); ir3_print(ir); abort(); } } void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, unsigned half_size, unsigned block_count) { #ifdef NDEBUG #define VALIDATE 0 #else #define VALIDATE 1 #endif if (!VALIDATE) return; struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx); ctx->merged_regs = v->mergedregs; ctx->full_size = full_size; ctx->half_size = half_size; ctx->block_count = block_count; foreach_block (block, &v->ir->block_list) { foreach_instr (instr, &block->instr_list) { validate_simple(ctx, instr); } } check_reaching_defs(ctx, v->ir); ralloc_free(ctx); }