mesa/src/freedreno/ir3/ir3_array_to_ssa.c

316 lines
9.9 KiB
C

/*
* Copyright (C) 2021 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* This pass lowers array accesses to SSA.
*
* After this pass, instructions writing arrays implicitly read the contents of
* the array defined in instr->dsts[0]->def (possibly a phi node), perform the
* operation, and store to instr->dsts[0].
*
* This makes arrays appear like "normal" SSA values, even if the false
* dependencies mean that they always stay in CSSA form (i.e. able to removed
* out-of-SSA with no copies.) While hopefully they shouldn't induce copies in
* most cases, we can't make that guarantee while also splitting spilling from
* RA and guaranteeing a certain number of registers are used, so we have to
* insert the phi nodes to be able to know when copying should happen.
*
* The implementation is based on the idea in "Simple and Efficient Construction
* of Static Single Assignment Form" of scanning backwards to find the
* definition. However, since we're not doing this on-the-fly we can simplify
* things a little by doing a pre-pass to get the last definition of each array
* in each block. Then we optimize trivial phis in a separate pass, "on the fly"
* so that we don't have to rewrite (and keep track of) users.
*/
#include <stdlib.h>
#include "ir3.h"
struct array_state {
struct ir3_register *live_in_definition;
struct ir3_register *live_out_definition;
bool constructed;
bool optimized;
};
struct array_ctx {
struct array_state *states;
struct ir3 *ir;
unsigned array_count;
};
static struct array_state *
get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
{
return &ctx->states[ctx->array_count * block->index + id];
}
static struct ir3_register *read_value_beginning(struct array_ctx *ctx,
struct ir3_block *block,
struct ir3_array *arr);
static struct ir3_register *
read_value_end(struct array_ctx *ctx, struct ir3_block *block,
struct ir3_array *arr)
{
struct array_state *state = get_state(ctx, block, arr->id);
if (state->live_out_definition)
return state->live_out_definition;
state->live_out_definition = read_value_beginning(ctx, block, arr);
return state->live_out_definition;
}
/* Roughly equivalent to readValueRecursive from the paper: */
static struct ir3_register *
read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,
struct ir3_array *arr)
{
struct array_state *state = get_state(ctx, block, arr->id);
if (state->constructed)
return state->live_in_definition;
if (block->predecessors_count == 0) {
state->constructed = true;
return NULL;
}
if (block->predecessors_count == 1) {
state->live_in_definition =
read_value_end(ctx, block->predecessors[0], arr);
state->constructed = true;
return state->live_in_definition;
}
unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
struct ir3_instruction *phi =
ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
list_del(&phi->node);
list_add(&phi->node, &block->instr_list);
struct ir3_register *dst = __ssa_dst(phi);
dst->flags |= flags;
dst->array.id = arr->id;
dst->size = arr->length;
state->live_in_definition = phi->dsts[0];
state->constructed = true;
for (unsigned i = 0; i < block->predecessors_count; i++) {
struct ir3_register *src =
read_value_end(ctx, block->predecessors[i], arr);
struct ir3_register *src_reg;
if (src) {
src_reg = __ssa_src(phi, src->instr, flags);
} else {
src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
}
src_reg->array.id = arr->id;
src_reg->size = arr->length;
}
return phi->dsts[0];
}
static struct ir3_register *
remove_trivial_phi(struct ir3_instruction *phi)
{
/* Break cycles */
if (phi->data)
return phi->data;
phi->data = phi->dsts[0];
struct ir3_register *unique_def = NULL;
bool unique = true;
for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
struct ir3_register *src = phi->srcs[i];
/* If there are any undef sources, then the remaining sources may not
* dominate the phi node, even if they are all equal. So we need to
* bail out in this case.
*
* This seems to be a bug in the original paper.
*/
if (!src->def) {
unique = false;
break;
}
struct ir3_instruction *src_instr = src->def->instr;
/* phi sources which point to the phi itself don't count for
* figuring out if the phi is trivial
*/
if (src_instr == phi)
continue;
if (src_instr->opc == OPC_META_PHI) {
src->def = remove_trivial_phi(src->def->instr);
}
if (unique_def) {
if (unique_def != src->def) {
unique = false;
break;
}
} else {
unique_def = src->def;
}
}
if (unique) {
phi->data = unique_def;
return unique_def;
} else {
return phi->dsts[0];
}
}
static struct ir3_register *
lookup_value(struct ir3_register *reg)
{
if (reg->instr->opc == OPC_META_PHI)
return reg->instr->data;
return reg;
}
static struct ir3_register *
lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
{
struct array_state *state = get_state(ctx, block, id);
if (state->live_in_definition)
return lookup_value(state->live_in_definition);
return NULL;
}
bool
ir3_array_to_ssa(struct ir3 *ir)
{
struct array_ctx ctx = {};
foreach_array (array, &ir->array_list) {
ctx.array_count = MAX2(ctx.array_count, array->id + 1);
}
if (ctx.array_count == 0)
return false;
unsigned i = 0;
foreach_block (block, &ir->block_list) {
block->index = i++;
}
ctx.ir = ir;
ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));
foreach_block (block, &ir->block_list) {
foreach_instr (instr, &block->instr_list) {
foreach_dst (dst, instr) {
if (dst->flags & IR3_REG_ARRAY) {
struct array_state *state =
get_state(&ctx, block, dst->array.id);
state->live_out_definition = dst;
}
}
}
}
foreach_block (block, &ir->block_list) {
foreach_instr (instr, &block->instr_list) {
if (instr->opc == OPC_META_PHI)
continue;
foreach_dst (reg, instr) {
if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
/* Construct any phi nodes necessary to read this value */
read_value_beginning(&ctx, block, arr);
}
}
foreach_src (reg, instr) {
if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
/* Construct any phi nodes necessary to read this value */
read_value_beginning(&ctx, block, arr);
}
}
}
}
foreach_block (block, &ir->block_list) {
foreach_instr_safe (instr, &block->instr_list) {
if (instr->opc == OPC_META_PHI)
remove_trivial_phi(instr);
else
break;
}
}
foreach_block (block, &ir->block_list) {
foreach_instr_safe (instr, &block->instr_list) {
if (instr->opc == OPC_META_PHI) {
if (!(instr->flags & IR3_REG_ARRAY))
continue;
if (instr->data != instr->dsts[0]) {
list_del(&instr->node);
continue;
}
for (unsigned i = 0; i < instr->srcs_count; i++) {
instr->srcs[i] = lookup_value(instr->srcs[i]);
}
} else {
foreach_dst (reg, instr) {
if ((reg->flags & IR3_REG_ARRAY)) {
if (!reg->tied) {
struct ir3_register *def =
lookup_live_in(&ctx, block, reg->array.id);
if (def)
ir3_reg_set_last_array(instr, reg, def);
}
reg->flags |= IR3_REG_SSA;
}
}
foreach_src (reg, instr) {
if ((reg->flags & IR3_REG_ARRAY)) {
/* It is assumed that before calling
* ir3_array_to_ssa(), reg->def was set to the
* previous writer of the array within the current
* block or NULL if none.
*/
if (!reg->def) {
reg->def = lookup_live_in(&ctx, block, reg->array.id);
}
reg->flags |= IR3_REG_SSA;
}
}
}
}
}
free(ctx.states);
return true;
}