mesa/src/compiler/nir/nir.c

2013 lines
54 KiB
C
Raw Normal View History

/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Connor Abbott (cwabbott0@gmail.com)
*
*/
#include "nir.h"
#include "nir_control_flow_private.h"
#include <assert.h>
nir_shader *
nir_shader_create(void *mem_ctx,
gl_shader_stage stage,
const nir_shader_compiler_options *options,
shader_info *si)
{
nir_shader *shader = rzalloc(mem_ctx, nir_shader);
exec_list_make_empty(&shader->uniforms);
exec_list_make_empty(&shader->inputs);
exec_list_make_empty(&shader->outputs);
exec_list_make_empty(&shader->shared);
shader->options = options;
if (si)
shader->info = *si;
exec_list_make_empty(&shader->functions);
exec_list_make_empty(&shader->registers);
exec_list_make_empty(&shader->globals);
exec_list_make_empty(&shader->system_values);
shader->reg_alloc = 0;
shader->num_inputs = 0;
shader->num_outputs = 0;
shader->num_uniforms = 0;
shader->num_shared = 0;
shader->stage = stage;
return shader;
}
static nir_register *
reg_create(void *mem_ctx, struct exec_list *list)
{
nir_register *reg = ralloc(mem_ctx, nir_register);
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
list_inithead(&reg->uses);
list_inithead(&reg->defs);
list_inithead(&reg->if_uses);
reg->num_components = 0;
reg->bit_size = 32;
reg->num_array_elems = 0;
reg->is_packed = false;
reg->name = NULL;
exec_list_push_tail(list, &reg->node);
return reg;
}
nir_register *
nir_global_reg_create(nir_shader *shader)
{
nir_register *reg = reg_create(shader, &shader->registers);
reg->index = shader->reg_alloc++;
reg->is_global = true;
return reg;
}
nir_register *
nir_local_reg_create(nir_function_impl *impl)
{
nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
reg->index = impl->reg_alloc++;
reg->is_global = false;
return reg;
}
void
nir_reg_remove(nir_register *reg)
{
exec_node_remove(&reg->node);
}
void
nir_shader_add_variable(nir_shader *shader, nir_variable *var)
{
switch (var->data.mode) {
case nir_var_all:
assert(!"invalid mode");
break;
case nir_var_local:
assert(!"nir_shader_add_variable cannot be used for local variables");
break;
case nir_var_param:
assert(!"nir_shader_add_variable cannot be used for function parameters");
break;
case nir_var_global:
exec_list_push_tail(&shader->globals, &var->node);
break;
case nir_var_shader_in:
exec_list_push_tail(&shader->inputs, &var->node);
break;
case nir_var_shader_out:
exec_list_push_tail(&shader->outputs, &var->node);
break;
case nir_var_uniform:
case nir_var_shader_storage:
exec_list_push_tail(&shader->uniforms, &var->node);
break;
case nir_var_shared:
assert(shader->stage == MESA_SHADER_COMPUTE);
exec_list_push_tail(&shader->shared, &var->node);
break;
case nir_var_system_value:
exec_list_push_tail(&shader->system_values, &var->node);
break;
}
}
nir_variable *
nir_variable_create(nir_shader *shader, nir_variable_mode mode,
const struct glsl_type *type, const char *name)
{
nir_variable *var = rzalloc(shader, nir_variable);
var->name = ralloc_strdup(var, name);
var->type = type;
var->data.mode = mode;
if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
(mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
var->data.interpolation = INTERP_MODE_SMOOTH;
if (mode == nir_var_shader_in || mode == nir_var_uniform)
var->data.read_only = true;
nir_shader_add_variable(shader, var);
return var;
}
nir_variable *
nir_local_variable_create(nir_function_impl *impl,
const struct glsl_type *type, const char *name)
{
nir_variable *var = rzalloc(impl->function->shader, nir_variable);
var->name = ralloc_strdup(var, name);
var->type = type;
var->data.mode = nir_var_local;
nir_function_impl_add_variable(impl, var);
return var;
}
nir_function *
nir_function_create(nir_shader *shader, const char *name)
{
nir_function *func = ralloc(shader, nir_function);
exec_list_push_tail(&shader->functions, &func->node);
func->name = ralloc_strdup(func, name);
func->shader = shader;
func->num_params = 0;
func->params = NULL;
func->return_type = glsl_void_type();
func->impl = NULL;
return func;
}
void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
{
dest->is_ssa = src->is_ssa;
if (src->is_ssa) {
dest->ssa = src->ssa;
} else {
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
dest->reg.indirect = ralloc(mem_ctx, nir_src);
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
} else {
dest->reg.indirect = NULL;
}
}
}
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
{
/* Copying an SSA definition makes no sense whatsoever. */
assert(!src->is_ssa);
dest->is_ssa = false;
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
dest->reg.indirect = ralloc(instr, nir_src);
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
} else {
dest->reg.indirect = NULL;
}
}
void
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
nir_alu_instr *instr)
{
nir_src_copy(&dest->src, &src->src, &instr->instr);
dest->abs = src->abs;
dest->negate = src->negate;
for (unsigned i = 0; i < 4; i++)
dest->swizzle[i] = src->swizzle[i];
}
void
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
nir_alu_instr *instr)
{
nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
dest->write_mask = src->write_mask;
dest->saturate = src->saturate;
}
static void
cf_init(nir_cf_node *node, nir_cf_node_type type)
{
exec_node_init(&node->node);
node->parent = NULL;
node->type = type;
}
nir_function_impl *
nir_function_impl_create_bare(nir_shader *shader)
{
nir_function_impl *impl = ralloc(shader, nir_function_impl);
impl->function = NULL;
cf_init(&impl->cf_node, nir_cf_node_function);
exec_list_make_empty(&impl->body);
exec_list_make_empty(&impl->registers);
exec_list_make_empty(&impl->locals);
impl->num_params = 0;
impl->params = NULL;
impl->return_var = NULL;
impl->reg_alloc = 0;
impl->ssa_alloc = 0;
impl->valid_metadata = nir_metadata_none;
/* create start & end blocks */
nir_block *start_block = nir_block_create(shader);
nir_block *end_block = nir_block_create(shader);
start_block->cf_node.parent = &impl->cf_node;
end_block->cf_node.parent = &impl->cf_node;
impl->end_block = end_block;
exec_list_push_tail(&impl->body, &start_block->cf_node.node);
start_block->successors[0] = end_block;
_mesa_set_add(end_block->predecessors, start_block);
return impl;
}
nir_function_impl *
nir_function_impl_create(nir_function *function)
{
assert(function->impl == NULL);
nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
function->impl = impl;
impl->function = function;
impl->num_params = function->num_params;
impl->params = ralloc_array(function->shader,
nir_variable *, impl->num_params);
for (unsigned i = 0; i < impl->num_params; i++) {
impl->params[i] = rzalloc(function->shader, nir_variable);
impl->params[i]->type = function->params[i].type;
impl->params[i]->data.mode = nir_var_param;
impl->params[i]->data.location = i;
}
if (!glsl_type_is_void(function->return_type)) {
impl->return_var = rzalloc(function->shader, nir_variable);
impl->return_var->type = function->return_type;
impl->return_var->data.mode = nir_var_param;
impl->return_var->data.location = -1;
} else {
impl->return_var = NULL;
}
return impl;
}
nir_block *
nir_block_create(nir_shader *shader)
{
nir_block *block = rzalloc(shader, nir_block);
cf_init(&block->cf_node, nir_cf_node_block);
block->successors[0] = block->successors[1] = NULL;
block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
2014-07-19 00:13:11 +01:00
block->imm_dom = NULL;
/* XXX maybe it would be worth it to defer allocation? This
* way it doesn't get allocated for shader refs that never run
* nir_calc_dominance? For example, state-tracker creates an
* initial IR, clones that, runs appropriate lowering pass, passes
* to driver which does common lowering/opt, and then stores ref
* which is later used to do state specific lowering and futher
* opt. Do any of the references not need dominance metadata?
*/
block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
exec_list_make_empty(&block->instr_list);
return block;
}
static inline void
src_init(nir_src *src)
{
src->is_ssa = false;
src->reg.reg = NULL;
src->reg.indirect = NULL;
src->reg.base_offset = 0;
}
nir_if *
nir_if_create(nir_shader *shader)
{
nir_if *if_stmt = ralloc(shader, nir_if);
cf_init(&if_stmt->cf_node, nir_cf_node_if);
src_init(&if_stmt->condition);
nir_block *then = nir_block_create(shader);
exec_list_make_empty(&if_stmt->then_list);
exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
then->cf_node.parent = &if_stmt->cf_node;
nir_block *else_stmt = nir_block_create(shader);
exec_list_make_empty(&if_stmt->else_list);
exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
else_stmt->cf_node.parent = &if_stmt->cf_node;
return if_stmt;
}
nir_loop *
nir_loop_create(nir_shader *shader)
{
nir: Add a loop analysis pass This pass detects induction variables and calculates the trip count of loops to be used for loop unrolling. V2: Rebase, adapt to removal of function overloads V3: (Timothy Arceri) - don't try to find trip count if loop terminator conditional is a phi - fix trip count for do-while loops - replace conditional type != alu assert with return - disable unrolling of loops with continues - multiple fixes to memory allocation, stop leaking and don't destroy structs we want to use for unrolling. - fix iteration count bugs when induction var not on RHS of condition - add FIXME for && conditions - calculate trip count for unsigned induction/limit vars V4: (Timothy Arceri) - count instructions in a loop - set the limiting_terminator even if we can't find the trip count for all terminators. This is needed for complex unrolling where we handle 2 terminators and the trip count is unknown for one of them. - restruct structs so we don't keep information not required after analysis and remove dead fields. - force unrolling in some cases as per the rules in the GLSL IR pass V5: (Timothy Arceri) - fix metadata mask value 0x10 vs 0x16 V6: (Timothy Arceri) - merge loop_variable and nir_loop_variable structs and lists suggested by Jason - remove induction var hash table and store pointer to induction information in the loop_variable suggested by Jason. - use lowercase list_addtail() suggested by Jason. - tidy up init_loop_block() as per Jasons suggestions. - replace switch with nir_op_infos[alu->op].num_inputs == 2 in is_var_basic_induction_var() as suggested by Jason. - use nir_block_last_instr() in and rename foreach_cf_node_ex_loop() as suggested by Jason. - fix else check for is_trivial_loop_terminator() as per Connors suggetions. - simplify offset for induction valiables incremented before the exit conditions is checked. - replace nir_op_isub check with assert() as it should have been lowered away. V7: (Timothy Arceri) - use rzalloc() on nir_loop struct creation. Worked previously because ralloc() was broken and always zeroed the struct. - fix cf_node_find_loop_jumps() to find jumps when loops contain nested if statements. Code is tidier as a result. V8: (Timothy Arceri) - move is_trivial_loop_terminator() to nir.h so we can use it to assert is the loop unroll pass - fix analysis to not bail when looking for terminator when the break is in the else rather then the if - added new loop terminator fields: break_block, continue_from_block and continue_from_then so we don't have to gather these when doing unrolling. - get correct array length when forcing unrolling of variables indexed arrays that are the same size as the iteration count - add support for induction variables of type float - update trival loop terminator check to allow an if containing instructions as long as both branches contain only a single block. V9: (Timothy) - bunch of tidy ups and simplifications suggested by Jason. - rewrote trivial terminator detection, now the only restriction is there must be no nested jumps, anything else goes. - rewrote the iteration test to use nir_eval_const_opcode(). - count instruction properly even when forcing an unroll. - bunch of other tidy ups and simplifications. V10: (Timothy) - some trivial tidy ups suggested by Jason. - conditional fix for break inside continue branch by Jason. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-13 03:39:51 +00:00
nir_loop *loop = rzalloc(shader, nir_loop);
cf_init(&loop->cf_node, nir_cf_node_loop);
nir_block *body = nir_block_create(shader);
exec_list_make_empty(&loop->body);
exec_list_push_tail(&loop->body, &body->cf_node.node);
body->cf_node.parent = &loop->cf_node;
body->successors[0] = body;
_mesa_set_add(body->predecessors, body);
return loop;
}
static void
instr_init(nir_instr *instr, nir_instr_type type)
{
instr->type = type;
instr->block = NULL;
exec_node_init(&instr->node);
}
static void
dest_init(nir_dest *dest)
{
dest->is_ssa = false;
dest->reg.reg = NULL;
dest->reg.indirect = NULL;
dest->reg.base_offset = 0;
}
static void
alu_dest_init(nir_alu_dest *dest)
{
dest_init(&dest->dest);
dest->saturate = false;
dest->write_mask = 0xf;
}
static void
alu_src_init(nir_alu_src *src)
{
src_init(&src->src);
src->abs = src->negate = false;
src->swizzle[0] = 0;
src->swizzle[1] = 1;
src->swizzle[2] = 2;
src->swizzle[3] = 3;
}
nir_alu_instr *
nir_alu_instr_create(nir_shader *shader, nir_op op)
{
unsigned num_srcs = nir_op_infos[op].num_inputs;
/* TODO: don't use rzalloc */
nir_alu_instr *instr =
rzalloc_size(shader,
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
instr_init(&instr->instr, nir_instr_type_alu);
instr->op = op;
alu_dest_init(&instr->dest);
for (unsigned i = 0; i < num_srcs; i++)
alu_src_init(&instr->src[i]);
return instr;
}
nir_jump_instr *
nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{
nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
instr_init(&instr->instr, nir_instr_type_jump);
instr->type = type;
return instr;
}
nir_load_const_instr *
nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
unsigned bit_size)
{
nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
return instr;
}
nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
{
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
/* TODO: don't use rzalloc */
nir_intrinsic_instr *instr =
rzalloc_size(shader,
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
instr_init(&instr->instr, nir_instr_type_intrinsic);
instr->intrinsic = op;
if (nir_intrinsic_infos[op].has_dest)
dest_init(&instr->dest);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i]);
return instr;
}
nir_call_instr *
nir_call_instr_create(nir_shader *shader, nir_function *callee)
{
nir_call_instr *instr = ralloc(shader, nir_call_instr);
instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee;
instr->num_params = callee->num_params;
instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
instr->return_deref = NULL;
return instr;
}
nir_tex_instr *
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{
nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest);
instr->num_srcs = num_srcs;
instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src);
instr->texture_index = 0;
instr->texture_array_size = 0;
instr->texture = NULL;
instr->sampler_index = 0;
instr->sampler = NULL;
return instr;
}
void
nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
{
assert(src_idx < tex->num_srcs);
/* First rewrite the source to NIR_SRC_INIT */
nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
/* Now, move all of the other sources down */
for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
tex->src[i-1].src_type = tex->src[i].src_type;
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
}
tex->num_srcs--;
}
nir_phi_instr *
nir_phi_instr_create(nir_shader *shader)
{
nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest);
exec_list_make_empty(&instr->srcs);
return instr;
}
nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader *shader)
{
nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries);
return instr;
}
nir_ssa_undef_instr *
nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size)
{
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
return instr;
}
nir_deref_var *
nir_deref_var_create(void *mem_ctx, nir_variable *var)
{
nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var);
deref->deref.deref_type = nir_deref_type_var;
deref->deref.child = NULL;
deref->deref.type = var->type;
deref->var = var;
return deref;
}
nir_deref_array *
nir_deref_array_create(void *mem_ctx)
{
nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array);
deref->deref.deref_type = nir_deref_type_array;
deref->deref.child = NULL;
deref->deref_array_type = nir_deref_array_type_direct;
src_init(&deref->indirect);
deref->base_offset = 0;
return deref;
}
nir_deref_struct *
nir_deref_struct_create(void *mem_ctx, unsigned field_index)
{
nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct);
deref->deref.deref_type = nir_deref_type_struct;
deref->deref.child = NULL;
deref->index = field_index;
return deref;
}
nir_deref_var *
nir_deref_var_clone(const nir_deref_var *deref, void *mem_ctx)
{
if (deref == NULL)
return NULL;
nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
ret->deref.child = nir_deref_clone(deref->deref.child, ret);
return ret;
}
static nir_deref_array *
deref_array_clone(const nir_deref_array *deref, void *mem_ctx)
{
nir_deref_array *ret = nir_deref_array_create(mem_ctx);
ret->base_offset = deref->base_offset;
ret->deref_array_type = deref->deref_array_type;
if (deref->deref_array_type == nir_deref_array_type_indirect) {
nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx);
}
ret->deref.type = deref->deref.type;
if (deref->deref.child)
ret->deref.child = nir_deref_clone(deref->deref.child, ret);
return ret;
}
static nir_deref_struct *
deref_struct_clone(const nir_deref_struct *deref, void *mem_ctx)
{
nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
ret->deref.child = nir_deref_clone(deref->deref.child, ret);
return ret;
}
nir_deref *
nir_deref_clone(const nir_deref *deref, void *mem_ctx)
{
if (deref == NULL)
return NULL;
switch (deref->deref_type) {
case nir_deref_type_var:
return &nir_deref_var_clone(nir_deref_as_var(deref), mem_ctx)->deref;
case nir_deref_type_array:
return &deref_array_clone(nir_deref_as_array(deref), mem_ctx)->deref;
case nir_deref_type_struct:
return &deref_struct_clone(nir_deref_as_struct(deref), mem_ctx)->deref;
default:
unreachable("Invalid dereference type");
}
return NULL;
}
/* This is the second step in the recursion. We've found the tail and made a
* copy. Now we need to iterate over all possible leaves and call the
* callback on each one.
*/
static bool
deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail,
nir_deref_foreach_leaf_cb cb, void *state)
{
unsigned length;
union {
nir_deref_array arr;
nir_deref_struct str;
} tmp;
assert(tail->child == NULL);
switch (glsl_get_base_type(tail->type)) {
case GLSL_TYPE_UINT:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT:
case GLSL_TYPE_INT64:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_BOOL:
if (glsl_type_is_vector_or_scalar(tail->type))
return cb(deref, state);
/* Fall Through */
case GLSL_TYPE_ARRAY:
tmp.arr.deref.deref_type = nir_deref_type_array;
tmp.arr.deref.type = glsl_get_array_element(tail->type);
tmp.arr.deref_array_type = nir_deref_array_type_direct;
tmp.arr.indirect = NIR_SRC_INIT;
tail->child = &tmp.arr.deref;
length = glsl_get_length(tail->type);
for (unsigned i = 0; i < length; i++) {
tmp.arr.deref.child = NULL;
tmp.arr.base_offset = i;
if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
return false;
}
return true;
case GLSL_TYPE_STRUCT:
tmp.str.deref.deref_type = nir_deref_type_struct;
tail->child = &tmp.str.deref;
length = glsl_get_length(tail->type);
for (unsigned i = 0; i < length; i++) {
tmp.arr.deref.child = NULL;
tmp.str.deref.type = glsl_get_struct_field(tail->type, i);
tmp.str.index = i;
if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
return false;
}
return true;
default:
unreachable("Invalid type for dereference");
}
}
/* This is the first step of the foreach_leaf recursion. In this step we are
* walking to the end of the deref chain and making a copy in the stack as we
* go. This is because we don't want to mutate the deref chain that was
* passed in by the caller. The downside is that this deref chain is on the
* stack and , if the caller wants to do anything with it, they will have to
* make their own copy because this one will go away.
*/
static bool
deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail,
nir_deref_foreach_leaf_cb cb, void *state)
{
union {
nir_deref_array arr;
nir_deref_struct str;
} c;
if (tail->child) {
switch (tail->child->deref_type) {
case nir_deref_type_array:
c.arr = *nir_deref_as_array(tail->child);
tail->child = &c.arr.deref;
return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state);
case nir_deref_type_struct:
c.str = *nir_deref_as_struct(tail->child);
tail->child = &c.str.deref;
return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state);
case nir_deref_type_var:
default:
unreachable("Invalid deref type for a child");
}
} else {
/* We've gotten to the end of the original deref. Time to start
* building our own derefs.
*/
return deref_foreach_leaf_build_recur(deref, tail, cb, state);
}
}
/**
* This function iterates over all of the possible derefs that can be created
* with the given deref as the head. It then calls the provided callback with
* a full deref for each one.
*
* The deref passed to the callback will be allocated on the stack. You will
* need to make a copy if you want it to hang around.
*/
bool
nir_deref_foreach_leaf(nir_deref_var *deref,
nir_deref_foreach_leaf_cb cb, void *state)
{
nir_deref_var copy = *deref;
return deref_foreach_leaf_copy_recur(&copy, &copy.deref, cb, state);
}
/* Returns a load_const instruction that represents the constant
* initializer for the given deref chain. The caller is responsible for
* ensuring that there actually is a constant initializer.
*/
nir_load_const_instr *
nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
{
nir_constant *constant = deref->var->constant_initializer;
assert(constant);
const nir_deref *tail = &deref->deref;
unsigned matrix_col = 0;
while (tail->child) {
switch (tail->child->deref_type) {
case nir_deref_type_array: {
nir_deref_array *arr = nir_deref_as_array(tail->child);
assert(arr->deref_array_type == nir_deref_array_type_direct);
if (glsl_type_is_matrix(tail->type)) {
assert(arr->deref.child == NULL);
matrix_col = arr->base_offset;
} else {
constant = constant->elements[arr->base_offset];
}
break;
}
case nir_deref_type_struct: {
constant = constant->elements[nir_deref_as_struct(tail->child)->index];
break;
}
default:
unreachable("Invalid deref child type");
}
tail = tail->child;
}
unsigned bit_size = glsl_get_bit_size(tail->type);
nir_load_const_instr *load =
nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type),
bit_size);
switch (glsl_get_base_type(tail->type)) {
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
case GLSL_TYPE_BOOL:
load->value = constant->values[matrix_col];
break;
default:
unreachable("Invalid immediate type");
}
return load;
}
nir_function_impl *
nir_cf_node_get_function(nir_cf_node *node)
{
while (node->type != nir_cf_node_function) {
node = node->parent;
}
return nir_cf_node_as_function(node);
}
/* Reduces a cursor by trying to convert everything to after and trying to
* go up to block granularity when possible.
*/
static nir_cursor
reduce_cursor(nir_cursor cursor)
{
switch (cursor.option) {
case nir_cursor_before_block:
assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
if (exec_list_is_empty(&cursor.block->instr_list)) {
/* Empty block. After is as good as before. */
cursor.option = nir_cursor_after_block;
}
return cursor;
case nir_cursor_after_block:
return cursor;
case nir_cursor_before_instr: {
nir_instr *prev_instr = nir_instr_prev(cursor.instr);
if (prev_instr) {
/* Before this instruction is after the previous */
cursor.instr = prev_instr;
cursor.option = nir_cursor_after_instr;
} else {
/* No previous instruction. Switch to before block */
cursor.block = cursor.instr->block;
cursor.option = nir_cursor_before_block;
}
return reduce_cursor(cursor);
}
case nir_cursor_after_instr:
if (nir_instr_next(cursor.instr) == NULL) {
/* This is the last instruction, switch to after block */
cursor.option = nir_cursor_after_block;
cursor.block = cursor.instr->block;
}
return cursor;
default:
unreachable("Inavlid cursor option");
}
}
bool
nir_cursors_equal(nir_cursor a, nir_cursor b)
{
/* Reduced cursors should be unique */
a = reduce_cursor(a);
b = reduce_cursor(b);
return a.block == b.block && a.option == b.option;
}
static bool
add_use_cb(nir_src *src, void *state)
{
nir_instr *instr = state;
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
src->parent_instr = instr;
list_addtail(&src->use_link,
src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
return true;
}
static bool
add_ssa_def_cb(nir_ssa_def *def, void *state)
{
nir_instr *instr = state;
if (instr->block && def->index == UINT_MAX) {
nir_function_impl *impl =
nir_cf_node_get_function(&instr->block->cf_node);
def->index = impl->ssa_alloc++;
}
return true;
}
static bool
add_reg_def_cb(nir_dest *dest, void *state)
{
nir_instr *instr = state;
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
if (!dest->is_ssa) {
dest->reg.parent_instr = instr;
list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
}
return true;
}
static void
add_defs_uses(nir_instr *instr)
{
nir_foreach_src(instr, add_use_cb, instr);
nir_foreach_dest(instr, add_reg_def_cb, instr);
nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
}
void
nir_instr_insert(nir_cursor cursor, nir_instr *instr)
{
switch (cursor.option) {
case nir_cursor_before_block:
/* Only allow inserting jumps into empty blocks. */
if (instr->type == nir_instr_type_jump)
assert(exec_list_is_empty(&cursor.block->instr_list));
instr->block = cursor.block;
add_defs_uses(instr);
exec_list_push_head(&cursor.block->instr_list, &instr->node);
break;
case nir_cursor_after_block: {
/* Inserting instructions after a jump is illegal. */
nir_instr *last = nir_block_last_instr(cursor.block);
assert(last == NULL || last->type != nir_instr_type_jump);
(void) last;
instr->block = cursor.block;
add_defs_uses(instr);
exec_list_push_tail(&cursor.block->instr_list, &instr->node);
break;
}
case nir_cursor_before_instr:
assert(instr->type != nir_instr_type_jump);
instr->block = cursor.instr->block;
add_defs_uses(instr);
exec_node_insert_node_before(&cursor.instr->node, &instr->node);
break;
case nir_cursor_after_instr:
/* Inserting instructions after a jump is illegal. */
assert(cursor.instr->type != nir_instr_type_jump);
/* Only allow inserting jumps at the end of the block. */
if (instr->type == nir_instr_type_jump)
assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
instr->block = cursor.instr->block;
add_defs_uses(instr);
exec_node_insert_after(&cursor.instr->node, &instr->node);
break;
}
if (instr->type == nir_instr_type_jump)
nir_handle_add_jump(instr->block);
}
static bool
src_is_valid(const nir_src *src)
{
return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
}
static bool
remove_use_cb(nir_src *src, void *state)
{
(void) state;
if (src_is_valid(src))
list_del(&src->use_link);
return true;
}
static bool
remove_def_cb(nir_dest *dest, void *state)
{
(void) state;
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
if (!dest->is_ssa)
list_del(&dest->reg.def_link);
return true;
}
static void
remove_defs_uses(nir_instr *instr)
{
nir_foreach_dest(instr, remove_def_cb, instr);
nir_foreach_src(instr, remove_use_cb, instr);
}
void nir_instr_remove(nir_instr *instr)
{
remove_defs_uses(instr);
exec_node_remove(&instr->node);
if (instr->type == nir_instr_type_jump) {
nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
nir_handle_remove_jump(instr->block, jump_instr->type);
}
}
/*@}*/
void
nir_index_local_regs(nir_function_impl *impl)
{
unsigned index = 0;
foreach_list_typed(nir_register, reg, node, &impl->registers) {
reg->index = index++;
}
impl->reg_alloc = index;
}
void
nir_index_global_regs(nir_shader *shader)
{
unsigned index = 0;
foreach_list_typed(nir_register, reg, node, &shader->registers) {
reg->index = index++;
}
shader->reg_alloc = index;
}
static bool
visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
{
return cb(&instr->dest.dest, state);
}
static bool
visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
void *state)
{
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
return cb(&instr->dest, state);
return true;
}
static bool
visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
void *state)
{
return cb(&instr->dest, state);
}
static bool
visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
{
return cb(&instr->dest, state);
}
static bool
visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
nir_foreach_dest_cb cb, void *state)
{
nir_foreach_parallel_copy_entry(entry, instr) {
if (!cb(&entry->dest, state))
return false;
}
return true;
}
bool
nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
{
switch (instr->type) {
case nir_instr_type_alu:
return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
case nir_instr_type_intrinsic:
return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
case nir_instr_type_tex:
return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
case nir_instr_type_phi:
return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
case nir_instr_type_parallel_copy:
return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
cb, state);
case nir_instr_type_load_const:
case nir_instr_type_ssa_undef:
case nir_instr_type_call:
case nir_instr_type_jump:
break;
default:
unreachable("Invalid instruction type");
break;
}
return true;
}
struct foreach_ssa_def_state {
nir_foreach_ssa_def_cb cb;
void *client_state;
};
static inline bool
nir_ssa_def_visitor(nir_dest *dest, void *void_state)
{
struct foreach_ssa_def_state *state = void_state;
if (dest->is_ssa)
return state->cb(&dest->ssa, state->client_state);
else
return true;
}
bool
nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
{
switch (instr->type) {
case nir_instr_type_alu:
case nir_instr_type_tex:
case nir_instr_type_intrinsic:
case nir_instr_type_phi:
case nir_instr_type_parallel_copy: {
struct foreach_ssa_def_state foreach_state = {cb, state};
return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
}
case nir_instr_type_load_const:
return cb(&nir_instr_as_load_const(instr)->def, state);
case nir_instr_type_ssa_undef:
return cb(&nir_instr_as_ssa_undef(instr)->def, state);
case nir_instr_type_call:
case nir_instr_type_jump:
return true;
default:
unreachable("Invalid instruction type");
}
}
static bool
visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
{
if (!cb(src, state))
return false;
if (!src->is_ssa && src->reg.indirect)
return cb(src->reg.indirect, state);
return true;
}
static bool
visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb,
void *state)
{
if (deref->deref_array_type == nir_deref_array_type_indirect)
return visit_src(&deref->indirect, cb, state);
return true;
}
static bool
visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state)
{
nir_deref *cur = &deref->deref;
while (cur != NULL) {
if (cur->deref_type == nir_deref_type_array) {
if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state))
return false;
}
cur = cur->child;
}
return true;
}
static bool
visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
{
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
if (!visit_src(&instr->src[i].src, cb, state))
return false;
return true;
}
static bool
visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
{
for (unsigned i = 0; i < instr->num_srcs; i++) {
if (!visit_src(&instr->src[i].src, cb, state))
return false;
}
if (instr->texture != NULL) {
if (!visit_deref_src(instr->texture, cb, state))
return false;
}
if (instr->sampler != NULL) {
if (!visit_deref_src(instr->sampler, cb, state))
return false;
}
return true;
}
static bool
visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
void *state)
{
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) {
if (!visit_src(&instr->src[i], cb, state))
return false;
}
unsigned num_vars =
nir_intrinsic_infos[instr->intrinsic].num_variables;
for (unsigned i = 0; i < num_vars; i++) {
if (!visit_deref_src(instr->variables[i], cb, state))
return false;
}
return true;
}
static bool
visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
{
nir_foreach_phi_src(src, instr) {
if (!visit_src(&src->src, cb, state))
return false;
}
return true;
}
static bool
visit_parallel_copy_src(nir_parallel_copy_instr *instr,
nir_foreach_src_cb cb, void *state)
{
nir_foreach_parallel_copy_entry(entry, instr) {
if (!visit_src(&entry->src, cb, state))
return false;
}
return true;
}
typedef struct {
void *state;
nir_foreach_src_cb cb;
} visit_dest_indirect_state;
static bool
visit_dest_indirect(nir_dest *dest, void *_state)
{
visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
if (!dest->is_ssa && dest->reg.indirect)
return state->cb(dest->reg.indirect, state->state);
return true;
}
bool
nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
{
switch (instr->type) {
case nir_instr_type_alu:
if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
return false;
break;
case nir_instr_type_intrinsic:
if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
return false;
break;
case nir_instr_type_tex:
if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
return false;
break;
case nir_instr_type_call:
/* Call instructions have no regular sources */
break;
case nir_instr_type_load_const:
/* Constant load instructions have no regular sources */
break;
case nir_instr_type_phi:
if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
return false;
break;
case nir_instr_type_parallel_copy:
if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
cb, state))
return false;
break;
case nir_instr_type_jump:
case nir_instr_type_ssa_undef:
return true;
default:
unreachable("Invalid instruction type");
break;
}
visit_dest_indirect_state dest_state;
dest_state.state = state;
dest_state.cb = cb;
return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
}
nir_const_value *
nir_src_as_const_value(nir_src src)
{
if (!src.is_ssa)
return NULL;
if (src.ssa->parent_instr->type != nir_instr_type_load_const)
return NULL;
nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
return &load->value;
}
/**
* Returns true if the source is known to be dynamically uniform. Otherwise it
* returns false which means it may or may not be dynamically uniform but it
* can't be determined.
*/
bool
nir_src_is_dynamically_uniform(nir_src src)
{
if (!src.is_ssa)
return false;
/* Constants are trivially dynamically uniform */
if (src.ssa->parent_instr->type == nir_instr_type_load_const)
return true;
/* As are uniform variables */
if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
if (intr->intrinsic == nir_intrinsic_load_uniform)
return true;
}
/* XXX: this could have many more tests, such as when a sampler function is
* called with dynamically uniform arguments.
*/
return false;
}
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
static void
src_remove_all_uses(nir_src *src)
{
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
if (!src_is_valid(src))
continue;
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
list_del(&src->use_link);
}
}
static void
src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
{
for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
if (!src_is_valid(src))
continue;
if (parent_instr) {
src->parent_instr = parent_instr;
if (src->is_ssa)
list_addtail(&src->use_link, &src->ssa->uses);
else
list_addtail(&src->use_link, &src->reg.reg->uses);
} else {
assert(parent_if);
src->parent_if = parent_if;
if (src->is_ssa)
list_addtail(&src->use_link, &src->ssa->if_uses);
else
list_addtail(&src->use_link, &src->reg.reg->if_uses);
}
}
}
void
nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
{
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
assert(!src_is_valid(src) || src->parent_instr == instr);
src_remove_all_uses(src);
*src = new_src;
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
src_add_all_uses(src, instr, NULL);
}
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
void
nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
{
assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
src_remove_all_uses(dest);
src_remove_all_uses(src);
*dest = *src;
*src = NIR_SRC_INIT;
src_add_all_uses(dest, dest_instr, NULL);
}
void
nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
{
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
nir_src *src = &if_stmt->condition;
assert(!src_is_valid(src) || src->parent_if == if_stmt);
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
src_remove_all_uses(src);
*src = new_src;
src_add_all_uses(src, NULL, if_stmt);
}
void
nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
{
if (dest->is_ssa) {
/* We can only overwrite an SSA destination if it has no uses. */
assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
} else {
list_del(&dest->reg.def_link);
if (dest->reg.indirect)
src_remove_all_uses(dest->reg.indirect);
}
/* We can't re-write with an SSA def */
assert(!new_dest.is_ssa);
nir_dest_copy(dest, &new_dest, instr);
dest->reg.parent_instr = instr;
list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
if (dest->reg.indirect)
src_add_all_uses(dest->reg.indirect, instr, NULL);
}
void
nir_instr_rewrite_deref(nir_instr *instr, nir_deref_var **deref,
nir_deref_var *new_deref)
{
if (*deref)
visit_deref_src(*deref, remove_use_cb, NULL);
*deref = new_deref;
if (*deref)
visit_deref_src(*deref, add_use_cb, instr);
}
/* note: does *not* take ownership of 'name' */
void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components,
unsigned bit_size, const char *name)
{
def->name = ralloc_strdup(instr, name);
def->parent_instr = instr;
nir/nir: Use a linked list instead of a hash set for use/def sets This commit switches us from the current setup of using hash sets for use/def sets to using linked lists. Doing so should save us quite a bit of memory because we aren't carrying around 3 hash sets per register and 2 per SSA value. It should also save us CPU time because adding/removing things from use/def sets is 4 pointer manipulations instead of a hash lookup. Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists: GLSL IR Only: 586.4 +/- 1.653833 NIR with hash sets: 675.4 +/- 2.502108 NIR + use/def lists: 641.2 +/- 1.557043 I also ran a memory usage experiment with Ken's patch to delete GLSL IR and keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over and above what we gained by deleting the GLSL IR on the same dota trace. On the code complexity side of things, some things are now much easier and others are a bit harder. One of the operations we perform constantly in optimization passes is to replace one source with another. Due to the fact that an instruction can use the same SSA value multiple times, we had to iterate through the sources of the instruction and determine if the use we were replacing was the only one before removing it from the set of uses. With this patch, uses are per-source not per-instruction so we can just remove it safely. On the other hand, trying to iterate over all of the instructions that use a given value is more difficult. Fortunately, the two places we do that are the ffma peephole where it doesn't matter and GCM where we already gracefully handle duplicates visits to an instruction. Another aspect here is that using linked lists in this way can be tricky to get right. With sets, things were quite forgiving and the worst that happened if you didn't properly remove a use was that it would get caught in the validator. With linked lists, it can lead to linked list corruption which can be harder to track. However, we do just as much validation of the linked lists as we did of the sets so the validator should still catch these problems. While working on this series, the vast majority of the bugs I had to fix were caught by assertions. I don't think the lists are going to be that much worse than the sets. Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
list_inithead(&def->uses);
list_inithead(&def->if_uses);
def->num_components = num_components;
def->bit_size = bit_size;
if (instr->block) {
nir_function_impl *impl =
nir_cf_node_get_function(&instr->block->cf_node);
def->index = impl->ssa_alloc++;
} else {
def->index = UINT_MAX;
}
}
/* note: does *not* take ownership of 'name' */
void
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, unsigned bit_size,
const char *name)
{
dest->is_ssa = true;
nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
}
void
nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
{
assert(!new_src.is_ssa || def != new_src.ssa);
nir_foreach_use_safe(use_src, def)
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
nir_foreach_if_use_safe(use_src, def)
nir_if_rewrite_condition(use_src->parent_if, new_src);
}
static bool
is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
{
assert(start->block == end->block);
if (between->block != start->block)
return false;
/* Search backwards looking for "between" */
while (start != end) {
if (between == end)
return true;
end = nir_instr_prev(end);
assert(end);
}
return false;
}
/* Replaces all uses of the given SSA def with the given source but only if
* the use comes after the after_me instruction. This can be useful if you
* are emitting code to fix up the result of some instruction: you can freely
* use the result in that code and then call rewrite_uses_after and pass the
* last fixup instruction as after_me and it will replace all of the uses you
* want without touching the fixup code.
*
* This function assumes that after_me is in the same block as
* def->parent_instr and that after_me comes after def->parent_instr.
*/
void
nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
nir_instr *after_me)
{
assert(!new_src.is_ssa || def != new_src.ssa);
nir_foreach_use_safe(use_src, def) {
assert(use_src->parent_instr != def->parent_instr);
/* Since def already dominates all of its uses, the only way a use can
* not be dominated by after_me is if it is between def and after_me in
* the instruction list.
*/
if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
}
nir_foreach_if_use_safe(use_src, def)
nir_if_rewrite_condition(use_src->parent_if, new_src);
}
uint8_t
nir_ssa_def_components_read(nir_ssa_def *def)
{
uint8_t read_mask = 0;
nir_foreach_use(use, def) {
if (use->parent_instr->type == nir_instr_type_alu) {
nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
int src_idx = alu_src - &alu->src[0];
assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
for (unsigned c = 0; c < 4; c++) {
if (!nir_alu_instr_channel_used(alu, src_idx, c))
continue;
read_mask |= (1 << alu_src->swizzle[c]);
}
} else {
return (1 << def->num_components) - 1;
}
}
return read_mask;
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_block *
nir_block_cf_tree_next(nir_block *block)
{
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
if (block == NULL) {
/* nir_foreach_block_safe() will call this function on a NULL block
* after the last iteration, but it won't use the result so just return
* NULL here.
*/
return NULL;
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
if (cf_next)
return nir_cf_node_cf_tree_first(cf_next);
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_cf_node *parent = block->cf_node.parent;
switch (parent->type) {
case nir_cf_node_if: {
/* Are we at the end of the if? Go to the beginning of the else */
nir_if *if_stmt = nir_cf_node_as_if(parent);
if (block == nir_if_last_then_block(if_stmt))
return nir_if_first_else_block(if_stmt);
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
assert(block == nir_if_last_else_block(if_stmt));
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
/* fall through */
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
case nir_cf_node_loop:
return nir_cf_node_as_block(nir_cf_node_next(parent));
case nir_cf_node_function:
return NULL;
default:
unreachable("unknown cf node type");
}
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_block *
nir_block_cf_tree_prev(nir_block *block)
{
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
if (block == NULL) {
/* do this for consistency with nir_block_cf_tree_next() */
return NULL;
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
if (cf_prev)
return nir_cf_node_cf_tree_last(cf_prev);
nir_cf_node *parent = block->cf_node.parent;
switch (parent->type) {
case nir_cf_node_if: {
/* Are we at the beginning of the else? Go to the end of the if */
nir_if *if_stmt = nir_cf_node_as_if(parent);
if (block == nir_if_first_else_block(if_stmt))
return nir_if_last_then_block(if_stmt);
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
assert(block == nir_if_first_then_block(if_stmt));
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
/* fall through */
}
case nir_cf_node_loop:
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
return nir_cf_node_as_block(nir_cf_node_prev(parent));
case nir_cf_node_function:
return NULL;
default:
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
unreachable("unknown cf node type");
}
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
{
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
switch (node->type) {
case nir_cf_node_function: {
nir_function_impl *impl = nir_cf_node_as_function(node);
return nir_start_block(impl);
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
case nir_cf_node_if: {
nir_if *if_stmt = nir_cf_node_as_if(node);
return nir_if_first_then_block(if_stmt);
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
}
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(node);
return nir_loop_first_block(loop);
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
}
case nir_cf_node_block: {
return nir_cf_node_as_block(node);
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
default:
unreachable("unknown node type");
}
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
{
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
switch (node->type) {
case nir_cf_node_function: {
nir_function_impl *impl = nir_cf_node_as_function(node);
return nir_impl_last_block(impl);
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
case nir_cf_node_if: {
nir_if *if_stmt = nir_cf_node_as_if(node);
return nir_if_last_else_block(if_stmt);
}
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(node);
return nir_loop_last_block(loop);
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
}
case nir_cf_node_block: {
return nir_cf_node_as_block(node);
}
default:
unreachable("unknown node type");
}
}
nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
{
if (node->type == nir_cf_node_block)
return nir_block_cf_tree_next(nir_cf_node_as_block(node));
nir: rewrite nir_foreach_block and friends Previously, these were functions which took a callback. This meant that the per-block code had to be in a separate function, and all the data that you wanted to pass in had to be a single void *. They walked the control flow tree recursively, doing a depth-first search, and called the callback in a preorder, matching the order of the original source code. But since each node in the control flow tree has a pointer to its parent, we can implement a "get-next" and "get-previous" method that does the same thing that the recursive function did with no state at all. This lets us rewrite nir_foreach_block() as a simple for loop, which lets us greatly simplify its users in some cases. This does require us to rewrite every user, although the transformation from the old nir_foreach_block() to the new nir_foreach_block() is mostly trivial. One subtlety, though, is that the new nir_foreach_block() won't handle the case where the current block is deleted, which the old one could. There's a new nir_foreach_block_safe() which implements the standard trick for solving this. Most users don't modify control flow, though, so they won't need it. Right now, only opt_select_peephole needs it. The old functions are reimplemented in terms of the new macros, although they'll go away after everything is converted. v2: keep an implementation of the old functions around v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop handling of nir_cf_node_cf_tree_last(). v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
else if (node->type == nir_cf_node_function)
return NULL;
else
return nir_cf_node_as_block(nir_cf_node_next(node));
}
nir_if *
nir_block_get_following_if(nir_block *block)
{
if (exec_node_is_tail_sentinel(&block->cf_node.node))
return NULL;
if (nir_cf_node_is_last(&block->cf_node))
return NULL;
nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
if (next_node->type != nir_cf_node_if)
return NULL;
return nir_cf_node_as_if(next_node);
}
nir_loop *
nir_block_get_following_loop(nir_block *block)
{
if (exec_node_is_tail_sentinel(&block->cf_node.node))
return NULL;
if (nir_cf_node_is_last(&block->cf_node))
return NULL;
nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
if (next_node->type != nir_cf_node_loop)
return NULL;
return nir_cf_node_as_loop(next_node);
}
void
nir_index_blocks(nir_function_impl *impl)
{
unsigned index = 0;
if (impl->valid_metadata & nir_metadata_block_index)
return;
nir_foreach_block(block, impl) {
block->index = index++;
}
impl->num_blocks = index;
}
static bool
index_ssa_def_cb(nir_ssa_def *def, void *state)
{
unsigned *index = (unsigned *) state;
def->index = (*index)++;
return true;
}
/**
* The indices are applied top-to-bottom which has the very nice property
* that, if A dominates B, then A->index <= B->index.
*/
void
nir_index_ssa_defs(nir_function_impl *impl)
{
unsigned index = 0;
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block)
nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
}
impl->ssa_alloc = index;
}
/**
* The indices are applied top-to-bottom which has the very nice property
* that, if A dominates B, then A->index <= B->index.
*/
unsigned
nir_index_instrs(nir_function_impl *impl)
{
unsigned index = 0;
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block)
instr->index = index++;
}
return index;
}
nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)
{
switch (val) {
case SYSTEM_VALUE_VERTEX_ID:
return nir_intrinsic_load_vertex_id;
case SYSTEM_VALUE_INSTANCE_ID:
return nir_intrinsic_load_instance_id;
case SYSTEM_VALUE_DRAW_ID:
return nir_intrinsic_load_draw_id;
case SYSTEM_VALUE_BASE_INSTANCE:
return nir_intrinsic_load_base_instance;
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
return nir_intrinsic_load_vertex_id_zero_base;
case SYSTEM_VALUE_BASE_VERTEX:
return nir_intrinsic_load_base_vertex;
case SYSTEM_VALUE_INVOCATION_ID:
return nir_intrinsic_load_invocation_id;
case SYSTEM_VALUE_FRAG_COORD:
return nir_intrinsic_load_frag_coord;
case SYSTEM_VALUE_FRONT_FACE:
return nir_intrinsic_load_front_face;
case SYSTEM_VALUE_SAMPLE_ID:
return nir_intrinsic_load_sample_id;
case SYSTEM_VALUE_SAMPLE_POS:
return nir_intrinsic_load_sample_pos;
case SYSTEM_VALUE_SAMPLE_MASK_IN:
return nir_intrinsic_load_sample_mask_in;
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
return nir_intrinsic_load_local_invocation_id;
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
return nir_intrinsic_load_local_invocation_index;
case SYSTEM_VALUE_WORK_GROUP_ID:
return nir_intrinsic_load_work_group_id;
case SYSTEM_VALUE_NUM_WORK_GROUPS:
return nir_intrinsic_load_num_work_groups;
case SYSTEM_VALUE_PRIMITIVE_ID:
return nir_intrinsic_load_primitive_id;
case SYSTEM_VALUE_TESS_COORD:
return nir_intrinsic_load_tess_coord;
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
return nir_intrinsic_load_tess_level_outer;
case SYSTEM_VALUE_TESS_LEVEL_INNER:
return nir_intrinsic_load_tess_level_inner;
case SYSTEM_VALUE_VERTICES_IN:
return nir_intrinsic_load_patch_vertices_in;
case SYSTEM_VALUE_HELPER_INVOCATION:
return nir_intrinsic_load_helper_invocation;
case SYSTEM_VALUE_VIEW_INDEX:
return nir_intrinsic_load_view_index;
case SYSTEM_VALUE_SUBGROUP_SIZE:
return nir_intrinsic_load_subgroup_size;
case SYSTEM_VALUE_SUBGROUP_INVOCATION:
return nir_intrinsic_load_subgroup_invocation;
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
return nir_intrinsic_load_subgroup_eq_mask;
case SYSTEM_VALUE_SUBGROUP_GE_MASK:
return nir_intrinsic_load_subgroup_ge_mask;
case SYSTEM_VALUE_SUBGROUP_GT_MASK:
return nir_intrinsic_load_subgroup_gt_mask;
case SYSTEM_VALUE_SUBGROUP_LE_MASK:
return nir_intrinsic_load_subgroup_le_mask;
case SYSTEM_VALUE_SUBGROUP_LT_MASK:
return nir_intrinsic_load_subgroup_lt_mask;
default:
unreachable("system value does not directly correspond to intrinsic");
}
}
gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
{
switch (intrin) {
case nir_intrinsic_load_vertex_id:
return SYSTEM_VALUE_VERTEX_ID;
case nir_intrinsic_load_instance_id:
return SYSTEM_VALUE_INSTANCE_ID;
case nir_intrinsic_load_draw_id:
return SYSTEM_VALUE_DRAW_ID;
case nir_intrinsic_load_base_instance:
return SYSTEM_VALUE_BASE_INSTANCE;
case nir_intrinsic_load_vertex_id_zero_base:
return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
case nir_intrinsic_load_base_vertex:
return SYSTEM_VALUE_BASE_VERTEX;
case nir_intrinsic_load_invocation_id:
return SYSTEM_VALUE_INVOCATION_ID;
case nir_intrinsic_load_frag_coord:
return SYSTEM_VALUE_FRAG_COORD;
case nir_intrinsic_load_front_face:
return SYSTEM_VALUE_FRONT_FACE;
case nir_intrinsic_load_sample_id:
return SYSTEM_VALUE_SAMPLE_ID;
case nir_intrinsic_load_sample_pos:
return SYSTEM_VALUE_SAMPLE_POS;
case nir_intrinsic_load_sample_mask_in:
return SYSTEM_VALUE_SAMPLE_MASK_IN;
case nir_intrinsic_load_local_invocation_id:
return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
case nir_intrinsic_load_local_invocation_index:
return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
case nir_intrinsic_load_num_work_groups:
return SYSTEM_VALUE_NUM_WORK_GROUPS;
case nir_intrinsic_load_work_group_id:
return SYSTEM_VALUE_WORK_GROUP_ID;
case nir_intrinsic_load_primitive_id:
return SYSTEM_VALUE_PRIMITIVE_ID;
case nir_intrinsic_load_tess_coord:
return SYSTEM_VALUE_TESS_COORD;
case nir_intrinsic_load_tess_level_outer:
return SYSTEM_VALUE_TESS_LEVEL_OUTER;
case nir_intrinsic_load_tess_level_inner:
return SYSTEM_VALUE_TESS_LEVEL_INNER;
case nir_intrinsic_load_patch_vertices_in:
return SYSTEM_VALUE_VERTICES_IN;
case nir_intrinsic_load_helper_invocation:
return SYSTEM_VALUE_HELPER_INVOCATION;
case nir_intrinsic_load_view_index:
return SYSTEM_VALUE_VIEW_INDEX;
case nir_intrinsic_load_subgroup_size:
return SYSTEM_VALUE_SUBGROUP_SIZE;
case nir_intrinsic_load_subgroup_invocation:
return SYSTEM_VALUE_SUBGROUP_INVOCATION;
case nir_intrinsic_load_subgroup_eq_mask:
return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
case nir_intrinsic_load_subgroup_ge_mask:
return SYSTEM_VALUE_SUBGROUP_GE_MASK;
case nir_intrinsic_load_subgroup_gt_mask:
return SYSTEM_VALUE_SUBGROUP_GT_MASK;
case nir_intrinsic_load_subgroup_le_mask:
return SYSTEM_VALUE_SUBGROUP_LE_MASK;
case nir_intrinsic_load_subgroup_lt_mask:
return SYSTEM_VALUE_SUBGROUP_LT_MASK;
default:
unreachable("intrinsic doesn't produce a system value");
}
}