312 lines
9.8 KiB
C
312 lines
9.8 KiB
C
/*
|
|
* Copyright © 2014 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
* Authors:
|
|
* Connor Abbott (cwabbott0@gmail.com)
|
|
*
|
|
*/
|
|
|
|
#include "nir.h"
|
|
#include "nir_builder.h"
|
|
#include "nir_phi_builder.h"
|
|
#include "nir_vla.h"
|
|
|
|
struct regs_to_ssa_state {
|
|
nir_shader *shader;
|
|
|
|
struct nir_phi_builder_value **values;
|
|
};
|
|
|
|
static bool
|
|
rewrite_src(nir_src *src, void *_state)
|
|
{
|
|
struct regs_to_ssa_state *state = _state;
|
|
|
|
if (src->is_ssa)
|
|
return true;
|
|
|
|
nir_instr *instr = src->parent_instr;
|
|
nir_register *reg = src->reg.reg;
|
|
struct nir_phi_builder_value *value = state->values[reg->index];
|
|
if (!value)
|
|
return true;
|
|
|
|
nir_block *block;
|
|
if (instr->type == nir_instr_type_phi) {
|
|
nir_phi_src *phi_src = exec_node_data(nir_phi_src, src, src);
|
|
block = phi_src->pred;
|
|
} else {
|
|
block = instr->block;
|
|
}
|
|
|
|
nir_ssa_def *def = nir_phi_builder_value_get_block_def(value, block);
|
|
nir_instr_rewrite_src(instr, src, nir_src_for_ssa(def));
|
|
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
rewrite_if_condition(nir_if *nif, struct regs_to_ssa_state *state)
|
|
{
|
|
if (nif->condition.is_ssa)
|
|
return;
|
|
|
|
nir_block *block = nir_cf_node_as_block(nir_cf_node_prev(&nif->cf_node));
|
|
nir_register *reg = nif->condition.reg.reg;
|
|
struct nir_phi_builder_value *value = state->values[reg->index];
|
|
if (!value)
|
|
return;
|
|
|
|
nir_ssa_def *def = nir_phi_builder_value_get_block_def(value, block);
|
|
nir_if_rewrite_condition(nif, nir_src_for_ssa(def));
|
|
}
|
|
|
|
static bool
|
|
rewrite_dest(nir_dest *dest, void *_state)
|
|
{
|
|
struct regs_to_ssa_state *state = _state;
|
|
|
|
if (dest->is_ssa)
|
|
return true;
|
|
|
|
nir_instr *instr = dest->reg.parent_instr;
|
|
nir_register *reg = dest->reg.reg;
|
|
struct nir_phi_builder_value *value = state->values[reg->index];
|
|
if (!value)
|
|
return true;
|
|
|
|
list_del(&dest->reg.def_link);
|
|
nir_ssa_dest_init(instr, dest, reg->num_components,
|
|
reg->bit_size, NULL);
|
|
|
|
nir_phi_builder_value_set_block_def(value, instr->block, &dest->ssa);
|
|
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
rewrite_alu_instr(nir_alu_instr *alu, struct regs_to_ssa_state *state)
|
|
{
|
|
nir_foreach_src(&alu->instr, rewrite_src, state);
|
|
|
|
if (alu->dest.dest.is_ssa)
|
|
return;
|
|
|
|
nir_register *reg = alu->dest.dest.reg.reg;
|
|
struct nir_phi_builder_value *value = state->values[reg->index];
|
|
if (!value)
|
|
return;
|
|
|
|
unsigned write_mask = alu->dest.write_mask;
|
|
if (write_mask == (1 << reg->num_components) - 1) {
|
|
/* This is the simple case where the instruction writes all the
|
|
* components. We can handle that the same as any other destination.
|
|
*/
|
|
rewrite_dest(&alu->dest.dest, state);
|
|
return;
|
|
}
|
|
|
|
/* Calculate the number of components the final instruction, which for
|
|
* per-component things is the number of output components of the
|
|
* instruction and non-per-component things is the number of enabled
|
|
* channels in the write mask.
|
|
*/
|
|
unsigned num_components;
|
|
uint8_t vec_swizzle[NIR_MAX_VEC_COMPONENTS];
|
|
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
|
|
vec_swizzle[i] = i;
|
|
|
|
if (nir_op_infos[alu->op].output_size == 0) {
|
|
/* Figure out the swizzle we need on the vecN operation and compute
|
|
* the number of components in the SSA def at the same time.
|
|
*/
|
|
num_components = 0;
|
|
for (unsigned index = 0; index < 4; index++) {
|
|
if (write_mask & (1 << index))
|
|
vec_swizzle[index] = num_components++;
|
|
}
|
|
|
|
/* When we change the output writemask, we need to change
|
|
* the swizzles for per-component inputs too
|
|
*/
|
|
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
|
if (nir_op_infos[alu->op].input_sizes[i] != 0)
|
|
continue;
|
|
|
|
/*
|
|
* We keep two indices:
|
|
* 1. The index of the original (non-SSA) component
|
|
* 2. The index of the post-SSA, compacted, component
|
|
*
|
|
* We need to map the swizzle component at index 1 to the swizzle
|
|
* component at index 2. Since index 1 is always larger than
|
|
* index 2, we can do it in a single loop.
|
|
*/
|
|
|
|
unsigned ssa_index = 0;
|
|
for (unsigned index = 0; index < 4; index++) {
|
|
if (!((write_mask >> index) & 1))
|
|
continue;
|
|
|
|
alu->src[i].swizzle[ssa_index++] = alu->src[i].swizzle[index];
|
|
}
|
|
assert(ssa_index == num_components);
|
|
}
|
|
} else {
|
|
num_components = nir_op_infos[alu->op].output_size;
|
|
}
|
|
assert(num_components <= 4);
|
|
|
|
alu->dest.write_mask = (1 << num_components) - 1;
|
|
list_del(&alu->dest.dest.reg.def_link);
|
|
nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
|
|
reg->bit_size, NULL);
|
|
|
|
nir_op vecN_op = nir_op_vec(reg->num_components);
|
|
|
|
nir_alu_instr *vec = nir_alu_instr_create(state->shader, vecN_op);
|
|
|
|
nir_ssa_def *old_src =
|
|
nir_phi_builder_value_get_block_def(value, alu->instr.block);
|
|
nir_ssa_def *new_src = &alu->dest.dest.ssa;
|
|
|
|
for (unsigned i = 0; i < reg->num_components; i++) {
|
|
if (write_mask & (1 << i)) {
|
|
vec->src[i].src = nir_src_for_ssa(new_src);
|
|
vec->src[i].swizzle[0] = vec_swizzle[i];
|
|
} else {
|
|
vec->src[i].src = nir_src_for_ssa(old_src);
|
|
vec->src[i].swizzle[0] = i;
|
|
}
|
|
}
|
|
|
|
nir_ssa_dest_init(&vec->instr, &vec->dest.dest, reg->num_components,
|
|
reg->bit_size, NULL);
|
|
nir_instr_insert(nir_after_instr(&alu->instr), &vec->instr);
|
|
|
|
nir_phi_builder_value_set_block_def(value, alu->instr.block,
|
|
&vec->dest.dest.ssa);
|
|
}
|
|
|
|
bool
|
|
nir_lower_regs_to_ssa_impl(nir_function_impl *impl)
|
|
{
|
|
if (exec_list_is_empty(&impl->registers))
|
|
return false;
|
|
|
|
nir_metadata_require(impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
nir_index_local_regs(impl);
|
|
|
|
void *dead_ctx = ralloc_context(NULL);
|
|
struct regs_to_ssa_state state;
|
|
state.shader = impl->function->shader;
|
|
state.values = ralloc_array(dead_ctx, struct nir_phi_builder_value *,
|
|
impl->reg_alloc);
|
|
|
|
struct nir_phi_builder *phi_build = nir_phi_builder_create(impl);
|
|
|
|
const unsigned block_set_words = BITSET_WORDS(impl->num_blocks);
|
|
BITSET_WORD *defs = ralloc_array(dead_ctx, BITSET_WORD, block_set_words);
|
|
|
|
nir_foreach_register(reg, &impl->registers) {
|
|
if (reg->num_array_elems != 0) {
|
|
/* This pass only really works on "plain" registers. If it's a
|
|
* packed or array register, just set the value to NULL so that the
|
|
* rewrite portion of the pass will know to ignore it.
|
|
*/
|
|
state.values[reg->index] = NULL;
|
|
continue;
|
|
}
|
|
|
|
memset(defs, 0, block_set_words * sizeof(*defs));
|
|
|
|
nir_foreach_def(dest, reg)
|
|
BITSET_SET(defs, dest->reg.parent_instr->block->index);
|
|
|
|
state.values[reg->index] =
|
|
nir_phi_builder_add_value(phi_build, reg->num_components,
|
|
reg->bit_size, defs);
|
|
}
|
|
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr(instr, block) {
|
|
switch (instr->type) {
|
|
case nir_instr_type_alu:
|
|
rewrite_alu_instr(nir_instr_as_alu(instr), &state);
|
|
break;
|
|
|
|
case nir_instr_type_phi:
|
|
/* We rewrite sources as a separate pass */
|
|
nir_foreach_dest(instr, rewrite_dest, &state);
|
|
break;
|
|
|
|
default:
|
|
nir_foreach_src(instr, rewrite_src, &state);
|
|
nir_foreach_dest(instr, rewrite_dest, &state);
|
|
}
|
|
}
|
|
|
|
nir_if *following_if = nir_block_get_following_if(block);
|
|
if (following_if)
|
|
rewrite_if_condition(following_if, &state);
|
|
|
|
/* Handle phi sources that source from this block. We have to do this
|
|
* as a separate pass because the phi builder assumes that uses and
|
|
* defs are processed in an order that respects dominance. When we have
|
|
* loops, a phi source may be a back-edge so we have to handle it as if
|
|
* it were one of the last instructions in the predecessor block.
|
|
*/
|
|
nir_foreach_phi_src_leaving_block(block, rewrite_src, &state);
|
|
}
|
|
|
|
nir_phi_builder_finish(phi_build);
|
|
|
|
nir_foreach_register_safe(reg, &impl->registers) {
|
|
if (state.values[reg->index]) {
|
|
assert(list_is_empty(®->uses));
|
|
assert(list_is_empty(®->if_uses));
|
|
assert(list_is_empty(®->defs));
|
|
exec_node_remove(®->node);
|
|
}
|
|
}
|
|
|
|
ralloc_free(dead_ctx);
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nir_lower_regs_to_ssa(nir_shader *shader)
|
|
{
|
|
bool progress = false;
|
|
|
|
nir_foreach_function(function, shader) {
|
|
if (function->impl)
|
|
progress |= nir_lower_regs_to_ssa_impl(function->impl);
|
|
}
|
|
|
|
return progress;
|
|
}
|