2014-08-01 00:16:23 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2014 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Connor Abbott (cwabbott0@gmail.com)
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "nir.h"
|
2015-07-22 03:54:18 +01:00
|
|
|
#include "nir_control_flow_private.h"
|
2017-08-30 04:36:55 +01:00
|
|
|
#include "util/half_float.h"
|
2017-07-26 21:32:01 +01:00
|
|
|
#include <limits.h>
|
2014-08-01 00:16:23 +01:00
|
|
|
#include <assert.h>
|
2017-08-30 04:36:55 +01:00
|
|
|
#include <math.h>
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
nir_shader *
|
2015-08-18 09:48:34 +01:00
|
|
|
nir_shader_create(void *mem_ctx,
|
|
|
|
gl_shader_stage stage,
|
2016-10-13 01:41:23 +01:00
|
|
|
const nir_shader_compiler_options *options,
|
|
|
|
shader_info *si)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2016-10-11 11:54:58 +01:00
|
|
|
nir_shader *shader = rzalloc(mem_ctx, nir_shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-03-18 19:34:09 +00:00
|
|
|
exec_list_make_empty(&shader->uniforms);
|
|
|
|
exec_list_make_empty(&shader->inputs);
|
|
|
|
exec_list_make_empty(&shader->outputs);
|
2016-01-09 01:16:29 +00:00
|
|
|
exec_list_make_empty(&shader->shared);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-02-03 00:13:49 +00:00
|
|
|
shader->options = options;
|
2016-10-13 01:41:23 +01:00
|
|
|
|
2017-09-15 03:52:38 +01:00
|
|
|
if (si) {
|
|
|
|
assert(si->stage == stage);
|
2017-05-08 17:20:21 +01:00
|
|
|
shader->info = *si;
|
2017-09-15 03:52:38 +01:00
|
|
|
} else {
|
|
|
|
shader->info.stage = stage;
|
|
|
|
}
|
2015-02-03 00:13:49 +00:00
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
exec_list_make_empty(&shader->functions);
|
|
|
|
exec_list_make_empty(&shader->registers);
|
|
|
|
exec_list_make_empty(&shader->globals);
|
|
|
|
exec_list_make_empty(&shader->system_values);
|
|
|
|
shader->reg_alloc = 0;
|
|
|
|
|
2014-08-05 18:54:27 +01:00
|
|
|
shader->num_inputs = 0;
|
|
|
|
shader->num_outputs = 0;
|
|
|
|
shader->num_uniforms = 0;
|
2016-01-18 17:44:31 +00:00
|
|
|
shader->num_shared = 0;
|
2014-08-05 18:54:27 +01:00
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
return shader;
|
|
|
|
}
|
|
|
|
|
|
|
|
static nir_register *
|
|
|
|
reg_create(void *mem_ctx, struct exec_list *list)
|
|
|
|
{
|
|
|
|
nir_register *reg = ralloc(mem_ctx, nir_register);
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_inithead(®->uses);
|
|
|
|
list_inithead(®->defs);
|
|
|
|
list_inithead(®->if_uses);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
reg->num_components = 0;
|
2015-11-17 14:45:18 +00:00
|
|
|
reg->bit_size = 32;
|
2014-08-01 00:16:23 +01:00
|
|
|
reg->num_array_elems = 0;
|
|
|
|
reg->is_packed = false;
|
|
|
|
reg->name = NULL;
|
|
|
|
|
|
|
|
exec_list_push_tail(list, ®->node);
|
|
|
|
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_register *
|
|
|
|
nir_global_reg_create(nir_shader *shader)
|
|
|
|
{
|
|
|
|
nir_register *reg = reg_create(shader, &shader->registers);
|
|
|
|
reg->index = shader->reg_alloc++;
|
|
|
|
reg->is_global = true;
|
|
|
|
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_register *
|
|
|
|
nir_local_reg_create(nir_function_impl *impl)
|
|
|
|
{
|
|
|
|
nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
|
|
|
|
reg->index = impl->reg_alloc++;
|
|
|
|
reg->is_global = false;
|
|
|
|
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nir_reg_remove(nir_register *reg)
|
|
|
|
{
|
|
|
|
exec_node_remove(®->node);
|
|
|
|
}
|
|
|
|
|
2015-10-09 15:05:11 +01:00
|
|
|
void
|
|
|
|
nir_shader_add_variable(nir_shader *shader, nir_variable *var)
|
|
|
|
{
|
|
|
|
switch (var->data.mode) {
|
2015-10-19 16:57:51 +01:00
|
|
|
case nir_var_all:
|
|
|
|
assert(!"invalid mode");
|
|
|
|
break;
|
|
|
|
|
2015-10-09 15:05:11 +01:00
|
|
|
case nir_var_local:
|
|
|
|
assert(!"nir_shader_add_variable cannot be used for local variables");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case nir_var_global:
|
|
|
|
exec_list_push_tail(&shader->globals, &var->node);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case nir_var_shader_in:
|
|
|
|
exec_list_push_tail(&shader->inputs, &var->node);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case nir_var_shader_out:
|
|
|
|
exec_list_push_tail(&shader->outputs, &var->node);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case nir_var_uniform:
|
|
|
|
case nir_var_shader_storage:
|
|
|
|
exec_list_push_tail(&shader->uniforms, &var->node);
|
|
|
|
break;
|
|
|
|
|
2016-01-09 01:16:29 +00:00
|
|
|
case nir_var_shared:
|
2017-09-15 03:52:38 +01:00
|
|
|
assert(shader->info.stage == MESA_SHADER_COMPUTE);
|
2016-01-09 01:16:29 +00:00
|
|
|
exec_list_push_tail(&shader->shared, &var->node);
|
|
|
|
break;
|
|
|
|
|
2015-10-09 15:05:11 +01:00
|
|
|
case nir_var_system_value:
|
|
|
|
exec_list_push_tail(&shader->system_values, &var->node);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_variable *
|
|
|
|
nir_variable_create(nir_shader *shader, nir_variable_mode mode,
|
|
|
|
const struct glsl_type *type, const char *name)
|
|
|
|
{
|
|
|
|
nir_variable *var = rzalloc(shader, nir_variable);
|
|
|
|
var->name = ralloc_strdup(var, name);
|
|
|
|
var->type = type;
|
|
|
|
var->data.mode = mode;
|
|
|
|
|
2017-09-15 03:52:38 +01:00
|
|
|
if ((mode == nir_var_shader_in &&
|
|
|
|
shader->info.stage != MESA_SHADER_VERTEX) ||
|
|
|
|
(mode == nir_var_shader_out &&
|
|
|
|
shader->info.stage != MESA_SHADER_FRAGMENT))
|
2016-07-07 10:02:38 +01:00
|
|
|
var->data.interpolation = INTERP_MODE_SMOOTH;
|
2015-10-09 15:05:11 +01:00
|
|
|
|
|
|
|
if (mode == nir_var_shader_in || mode == nir_var_uniform)
|
|
|
|
var->data.read_only = true;
|
|
|
|
|
|
|
|
nir_shader_add_variable(shader, var);
|
|
|
|
|
|
|
|
return var;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_variable *
|
|
|
|
nir_local_variable_create(nir_function_impl *impl,
|
|
|
|
const struct glsl_type *type, const char *name)
|
|
|
|
{
|
2015-12-26 18:00:47 +00:00
|
|
|
nir_variable *var = rzalloc(impl->function->shader, nir_variable);
|
2015-10-09 15:05:11 +01:00
|
|
|
var->name = ralloc_strdup(var, name);
|
|
|
|
var->type = type;
|
|
|
|
var->data.mode = nir_var_local;
|
|
|
|
|
|
|
|
nir_function_impl_add_variable(impl, var);
|
|
|
|
|
|
|
|
return var;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_function *
|
|
|
|
nir_function_create(nir_shader *shader, const char *name)
|
|
|
|
{
|
|
|
|
nir_function *func = ralloc(shader, nir_function);
|
|
|
|
|
|
|
|
exec_list_push_tail(&shader->functions, &func->node);
|
2015-12-26 18:00:47 +00:00
|
|
|
|
2015-03-28 02:23:36 +00:00
|
|
|
func->name = ralloc_strdup(func, name);
|
2014-10-29 19:42:54 +00:00
|
|
|
func->shader = shader;
|
2015-12-26 18:00:47 +00:00
|
|
|
func->num_params = 0;
|
|
|
|
func->params = NULL;
|
|
|
|
func->impl = NULL;
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return func;
|
|
|
|
}
|
|
|
|
|
2018-04-06 13:28:53 +01:00
|
|
|
/* NOTE: if the instruction you are copying a src to is already added
|
|
|
|
* to the IR, use nir_instr_rewrite_src() instead.
|
|
|
|
*/
|
2015-01-24 00:57:40 +00:00
|
|
|
void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-01-24 00:57:40 +00:00
|
|
|
dest->is_ssa = src->is_ssa;
|
|
|
|
if (src->is_ssa) {
|
|
|
|
dest->ssa = src->ssa;
|
2014-08-01 00:16:23 +01:00
|
|
|
} else {
|
2015-01-24 00:57:40 +00:00
|
|
|
dest->reg.base_offset = src->reg.base_offset;
|
|
|
|
dest->reg.reg = src->reg.reg;
|
|
|
|
if (src->reg.indirect) {
|
|
|
|
dest->reg.indirect = ralloc(mem_ctx, nir_src);
|
|
|
|
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
|
2014-08-01 00:16:23 +01:00
|
|
|
} else {
|
2015-01-24 00:57:40 +00:00
|
|
|
dest->reg.indirect = NULL;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-09 21:18:29 +01:00
|
|
|
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-07-02 00:00:08 +01:00
|
|
|
/* Copying an SSA definition makes no sense whatsoever. */
|
|
|
|
assert(!src->is_ssa);
|
|
|
|
|
|
|
|
dest->is_ssa = false;
|
|
|
|
|
|
|
|
dest->reg.base_offset = src->reg.base_offset;
|
|
|
|
dest->reg.reg = src->reg.reg;
|
|
|
|
if (src->reg.indirect) {
|
2015-09-09 21:18:29 +01:00
|
|
|
dest->reg.indirect = ralloc(instr, nir_src);
|
|
|
|
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
|
2014-08-01 00:16:23 +01:00
|
|
|
} else {
|
2015-07-02 00:00:08 +01:00
|
|
|
dest->reg.indirect = NULL;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-21 23:55:23 +00:00
|
|
|
void
|
2015-09-09 21:18:29 +01:00
|
|
|
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
|
|
|
|
nir_alu_instr *instr)
|
2015-01-21 23:55:23 +00:00
|
|
|
{
|
2015-09-09 21:18:29 +01:00
|
|
|
nir_src_copy(&dest->src, &src->src, &instr->instr);
|
2015-01-21 23:55:23 +00:00
|
|
|
dest->abs = src->abs;
|
|
|
|
dest->negate = src->negate;
|
2018-07-12 02:40:23 +01:00
|
|
|
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
|
2015-01-21 23:55:23 +00:00
|
|
|
dest->swizzle[i] = src->swizzle[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2015-09-09 21:18:29 +01:00
|
|
|
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
|
|
|
nir_alu_instr *instr)
|
2015-01-21 23:55:23 +00:00
|
|
|
{
|
2015-09-09 21:18:29 +01:00
|
|
|
nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
|
2015-01-21 23:55:23 +00:00
|
|
|
dest->write_mask = src->write_mask;
|
|
|
|
dest->saturate = src->saturate;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
static void
|
|
|
|
cf_init(nir_cf_node *node, nir_cf_node_type type)
|
|
|
|
{
|
|
|
|
exec_node_init(&node->node);
|
|
|
|
node->parent = NULL;
|
|
|
|
node->type = type;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_function_impl *
|
2015-10-28 04:34:56 +00:00
|
|
|
nir_function_impl_create_bare(nir_shader *shader)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-10-28 04:34:56 +00:00
|
|
|
nir_function_impl *impl = ralloc(shader, nir_function_impl);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-10-28 04:34:56 +00:00
|
|
|
impl->function = NULL;
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
cf_init(&impl->cf_node, nir_cf_node_function);
|
|
|
|
|
|
|
|
exec_list_make_empty(&impl->body);
|
|
|
|
exec_list_make_empty(&impl->registers);
|
|
|
|
exec_list_make_empty(&impl->locals);
|
|
|
|
impl->reg_alloc = 0;
|
|
|
|
impl->ssa_alloc = 0;
|
2014-10-29 19:42:54 +00:00
|
|
|
impl->valid_metadata = nir_metadata_none;
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
/* create start & end blocks */
|
2015-10-28 04:34:56 +00:00
|
|
|
nir_block *start_block = nir_block_create(shader);
|
|
|
|
nir_block *end_block = nir_block_create(shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
start_block->cf_node.parent = &impl->cf_node;
|
|
|
|
end_block->cf_node.parent = &impl->cf_node;
|
|
|
|
impl->end_block = end_block;
|
|
|
|
|
|
|
|
exec_list_push_tail(&impl->body, &start_block->cf_node.node);
|
|
|
|
|
|
|
|
start_block->successors[0] = end_block;
|
2015-07-22 03:54:16 +01:00
|
|
|
_mesa_set_add(end_block->predecessors, start_block);
|
2014-08-01 00:16:23 +01:00
|
|
|
return impl;
|
|
|
|
}
|
|
|
|
|
2015-10-28 04:34:56 +00:00
|
|
|
nir_function_impl *
|
|
|
|
nir_function_impl_create(nir_function *function)
|
|
|
|
{
|
|
|
|
assert(function->impl == NULL);
|
|
|
|
|
|
|
|
nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
|
|
|
|
|
|
|
|
function->impl = impl;
|
|
|
|
impl->function = function;
|
|
|
|
|
|
|
|
return impl;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_block *
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_block_create(nir_shader *shader)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2016-10-11 11:54:58 +01:00
|
|
|
nir_block *block = rzalloc(shader, nir_block);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
cf_init(&block->cf_node, nir_cf_node_block);
|
|
|
|
|
|
|
|
block->successors[0] = block->successors[1] = NULL;
|
2015-03-28 04:29:07 +00:00
|
|
|
block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
|
2015-01-15 17:31:18 +00:00
|
|
|
_mesa_key_pointer_equal);
|
2014-07-19 00:13:11 +01:00
|
|
|
block->imm_dom = NULL;
|
2015-11-11 16:31:29 +00:00
|
|
|
/* XXX maybe it would be worth it to defer allocation? This
|
2017-02-28 01:21:42 +00:00
|
|
|
* way it doesn't get allocated for shader refs that never run
|
2015-11-11 16:31:29 +00:00
|
|
|
* nir_calc_dominance? For example, state-tracker creates an
|
|
|
|
* initial IR, clones that, runs appropriate lowering pass, passes
|
|
|
|
* to driver which does common lowering/opt, and then stores ref
|
|
|
|
* which is later used to do state specific lowering and futher
|
|
|
|
* opt. Do any of the references not need dominance metadata?
|
|
|
|
*/
|
2015-03-28 04:29:07 +00:00
|
|
|
block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
|
2015-01-15 17:31:18 +00:00
|
|
|
_mesa_key_pointer_equal);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
exec_list_make_empty(&block->instr_list);
|
|
|
|
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
src_init(nir_src *src)
|
|
|
|
{
|
|
|
|
src->is_ssa = false;
|
|
|
|
src->reg.reg = NULL;
|
|
|
|
src->reg.indirect = NULL;
|
|
|
|
src->reg.base_offset = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_if *
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_if_create(nir_shader *shader)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_if *if_stmt = ralloc(shader, nir_if);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
cf_init(&if_stmt->cf_node, nir_cf_node_if);
|
|
|
|
src_init(&if_stmt->condition);
|
|
|
|
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_block *then = nir_block_create(shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
exec_list_make_empty(&if_stmt->then_list);
|
|
|
|
exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
|
|
|
|
then->cf_node.parent = &if_stmt->cf_node;
|
|
|
|
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_block *else_stmt = nir_block_create(shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
exec_list_make_empty(&if_stmt->else_list);
|
|
|
|
exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
|
|
|
|
else_stmt->cf_node.parent = &if_stmt->cf_node;
|
|
|
|
|
|
|
|
return if_stmt;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_loop *
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_loop_create(nir_shader *shader)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2016-12-13 03:39:51 +00:00
|
|
|
nir_loop *loop = rzalloc(shader, nir_loop);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
cf_init(&loop->cf_node, nir_cf_node_loop);
|
|
|
|
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_block *body = nir_block_create(shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
exec_list_make_empty(&loop->body);
|
|
|
|
exec_list_push_tail(&loop->body, &body->cf_node.node);
|
|
|
|
body->cf_node.parent = &loop->cf_node;
|
|
|
|
|
|
|
|
body->successors[0] = body;
|
2015-07-22 03:54:16 +01:00
|
|
|
_mesa_set_add(body->predecessors, body);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
instr_init(nir_instr *instr, nir_instr_type type)
|
|
|
|
{
|
|
|
|
instr->type = type;
|
|
|
|
instr->block = NULL;
|
|
|
|
exec_node_init(&instr->node);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
dest_init(nir_dest *dest)
|
|
|
|
{
|
|
|
|
dest->is_ssa = false;
|
|
|
|
dest->reg.reg = NULL;
|
|
|
|
dest->reg.indirect = NULL;
|
|
|
|
dest->reg.base_offset = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
alu_dest_init(nir_alu_dest *dest)
|
|
|
|
{
|
|
|
|
dest_init(&dest->dest);
|
|
|
|
dest->saturate = false;
|
|
|
|
dest->write_mask = 0xf;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
alu_src_init(nir_alu_src *src)
|
|
|
|
{
|
|
|
|
src_init(&src->src);
|
|
|
|
src->abs = src->negate = false;
|
2018-07-12 02:40:23 +01:00
|
|
|
for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
|
|
|
|
src->swizzle[i] = i;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
nir_alu_instr *
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_alu_instr_create(nir_shader *shader, nir_op op)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
|
|
|
unsigned num_srcs = nir_op_infos[op].num_inputs;
|
2016-10-06 23:34:26 +01:00
|
|
|
/* TODO: don't use rzalloc */
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_alu_instr *instr =
|
2016-10-06 23:34:26 +01:00
|
|
|
rzalloc_size(shader,
|
|
|
|
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
instr_init(&instr->instr, nir_instr_type_alu);
|
|
|
|
instr->op = op;
|
|
|
|
alu_dest_init(&instr->dest);
|
|
|
|
for (unsigned i = 0; i < num_srcs; i++)
|
|
|
|
alu_src_init(&instr->src[i]);
|
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
2018-03-15 04:45:38 +00:00
|
|
|
nir_deref_instr *
|
|
|
|
nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
|
|
|
|
{
|
|
|
|
nir_deref_instr *instr =
|
|
|
|
rzalloc_size(shader, sizeof(nir_deref_instr));
|
|
|
|
|
|
|
|
instr_init(&instr->instr, nir_instr_type_deref);
|
|
|
|
|
|
|
|
instr->deref_type = deref_type;
|
|
|
|
if (deref_type != nir_deref_type_var)
|
|
|
|
src_init(&instr->parent);
|
|
|
|
|
|
|
|
if (deref_type == nir_deref_type_array)
|
|
|
|
src_init(&instr->arr.index);
|
|
|
|
|
|
|
|
dest_init(&instr->dest);
|
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_jump_instr *
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
|
2014-08-01 00:16:23 +01:00
|
|
|
instr_init(&instr->instr, nir_instr_type_jump);
|
|
|
|
instr->type = type;
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_load_const_instr *
|
2016-03-23 07:04:18 +00:00
|
|
|
nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
|
|
|
|
unsigned bit_size)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2017-10-02 08:14:51 +01:00
|
|
|
nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr);
|
2014-08-01 00:16:23 +01:00
|
|
|
instr_init(&instr->instr, nir_instr_type_load_const);
|
|
|
|
|
2016-03-23 07:04:18 +00:00
|
|
|
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_intrinsic_instr *
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
|
|
|
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
|
2016-10-06 23:34:26 +01:00
|
|
|
/* TODO: don't use rzalloc */
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_intrinsic_instr *instr =
|
2016-10-06 23:34:26 +01:00
|
|
|
rzalloc_size(shader,
|
2014-08-01 00:16:23 +01:00
|
|
|
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
|
|
|
|
|
|
|
|
instr_init(&instr->instr, nir_instr_type_intrinsic);
|
|
|
|
instr->intrinsic = op;
|
|
|
|
|
|
|
|
if (nir_intrinsic_infos[op].has_dest)
|
|
|
|
dest_init(&instr->dest);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < num_srcs; i++)
|
|
|
|
src_init(&instr->src[i]);
|
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_call_instr *
|
2015-12-26 18:00:47 +00:00
|
|
|
nir_call_instr_create(nir_shader *shader, nir_function *callee)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2018-03-22 23:41:18 +00:00
|
|
|
const unsigned num_params = callee->num_params;
|
|
|
|
nir_call_instr *instr =
|
|
|
|
rzalloc_size(shader, sizeof(*instr) +
|
|
|
|
num_params * sizeof(instr->params[0]));
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2018-03-22 23:41:18 +00:00
|
|
|
instr_init(&instr->instr, nir_instr_type_call);
|
2014-08-01 00:16:23 +01:00
|
|
|
instr->callee = callee;
|
2018-03-22 23:41:18 +00:00
|
|
|
instr->num_params = num_params;
|
|
|
|
for (unsigned i = 0; i < num_params; i++)
|
|
|
|
src_init(&instr->params[i]);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_tex_instr *
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-08-12 01:10:35 +01:00
|
|
|
nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
|
2014-12-05 19:03:06 +00:00
|
|
|
instr_init(&instr->instr, nir_instr_type_tex);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2014-12-18 01:30:27 +00:00
|
|
|
dest_init(&instr->dest);
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
instr->num_srcs = num_srcs;
|
2015-03-28 07:02:37 +00:00
|
|
|
instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
|
2015-01-10 04:01:13 +00:00
|
|
|
for (unsigned i = 0; i < num_srcs; i++)
|
|
|
|
src_init(&instr->src[i].src);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2016-02-06 17:05:10 +00:00
|
|
|
instr->texture_index = 0;
|
|
|
|
instr->texture_array_size = 0;
|
2015-11-03 01:58:29 +00:00
|
|
|
instr->sampler_index = 0;
|
2014-12-05 22:46:24 +00:00
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
2017-10-16 16:50:23 +01:00
|
|
|
void
|
|
|
|
nir_tex_instr_add_src(nir_tex_instr *tex,
|
|
|
|
nir_tex_src_type src_type,
|
|
|
|
nir_src src)
|
|
|
|
{
|
|
|
|
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
|
|
|
|
tex->num_srcs + 1);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
|
|
|
new_srcs[i].src_type = tex->src[i].src_type;
|
|
|
|
nir_instr_move_src(&tex->instr, &new_srcs[i].src,
|
|
|
|
&tex->src[i].src);
|
|
|
|
}
|
|
|
|
|
|
|
|
ralloc_free(tex->src);
|
|
|
|
tex->src = new_srcs;
|
|
|
|
|
|
|
|
tex->src[tex->num_srcs].src_type = src_type;
|
|
|
|
nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
|
|
|
|
tex->num_srcs++;
|
|
|
|
}
|
|
|
|
|
2016-09-08 19:07:06 +01:00
|
|
|
void
|
|
|
|
nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
|
|
|
|
{
|
|
|
|
assert(src_idx < tex->num_srcs);
|
|
|
|
|
|
|
|
/* First rewrite the source to NIR_SRC_INIT */
|
|
|
|
nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
|
|
|
|
|
|
|
|
/* Now, move all of the other sources down */
|
|
|
|
for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
|
|
|
|
tex->src[i-1].src_type = tex->src[i].src_type;
|
|
|
|
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
|
|
|
|
}
|
|
|
|
tex->num_srcs--;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_phi_instr *
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_phi_instr_create(nir_shader *shader)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
|
2014-08-01 00:16:23 +01:00
|
|
|
instr_init(&instr->instr, nir_instr_type_phi);
|
|
|
|
|
|
|
|
dest_init(&instr->dest);
|
|
|
|
exec_list_make_empty(&instr->srcs);
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
2014-10-31 04:04:15 +00:00
|
|
|
nir_parallel_copy_instr *
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_parallel_copy_instr_create(nir_shader *shader)
|
2014-10-31 04:04:15 +00:00
|
|
|
{
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
|
2014-10-31 04:04:15 +00:00
|
|
|
instr_init(&instr->instr, nir_instr_type_parallel_copy);
|
|
|
|
|
2014-12-18 00:53:04 +00:00
|
|
|
exec_list_make_empty(&instr->entries);
|
2014-10-31 04:04:15 +00:00
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_ssa_undef_instr *
|
2016-03-23 07:04:09 +00:00
|
|
|
nir_ssa_undef_instr_create(nir_shader *shader,
|
|
|
|
unsigned num_components,
|
|
|
|
unsigned bit_size)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
|
2014-08-01 00:16:23 +01:00
|
|
|
instr_init(&instr->instr, nir_instr_type_ssa_undef);
|
|
|
|
|
2016-03-23 07:04:09 +00:00
|
|
|
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return instr;
|
|
|
|
}
|
|
|
|
|
2017-08-30 04:36:55 +01:00
|
|
|
static nir_const_value
|
|
|
|
const_value_float(double d, unsigned bit_size)
|
|
|
|
{
|
|
|
|
nir_const_value v;
|
|
|
|
switch (bit_size) {
|
|
|
|
case 16: v.u16[0] = _mesa_float_to_half(d); break;
|
|
|
|
case 32: v.f32[0] = d; break;
|
|
|
|
case 64: v.f64[0] = d; break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
}
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static nir_const_value
|
|
|
|
const_value_int(int64_t i, unsigned bit_size)
|
|
|
|
{
|
|
|
|
nir_const_value v;
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8: v.i8[0] = i; break;
|
|
|
|
case 16: v.i16[0] = i; break;
|
|
|
|
case 32: v.i32[0] = i; break;
|
|
|
|
case 64: v.i64[0] = i; break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
}
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_const_value
|
|
|
|
nir_alu_binop_identity(nir_op binop, unsigned bit_size)
|
|
|
|
{
|
|
|
|
const int64_t max_int = (1ull << (bit_size - 1)) - 1;
|
|
|
|
const int64_t min_int = -max_int - 1;
|
|
|
|
switch (binop) {
|
|
|
|
case nir_op_iadd:
|
|
|
|
return const_value_int(0, bit_size);
|
|
|
|
case nir_op_fadd:
|
|
|
|
return const_value_float(0, bit_size);
|
|
|
|
case nir_op_imul:
|
|
|
|
return const_value_int(1, bit_size);
|
|
|
|
case nir_op_fmul:
|
|
|
|
return const_value_float(1, bit_size);
|
|
|
|
case nir_op_imin:
|
|
|
|
return const_value_int(max_int, bit_size);
|
|
|
|
case nir_op_umin:
|
|
|
|
return const_value_int(~0ull, bit_size);
|
|
|
|
case nir_op_fmin:
|
|
|
|
return const_value_float(INFINITY, bit_size);
|
|
|
|
case nir_op_imax:
|
|
|
|
return const_value_int(min_int, bit_size);
|
|
|
|
case nir_op_umax:
|
|
|
|
return const_value_int(0, bit_size);
|
|
|
|
case nir_op_fmax:
|
|
|
|
return const_value_float(-INFINITY, bit_size);
|
|
|
|
case nir_op_iand:
|
|
|
|
return const_value_int(~0ull, bit_size);
|
|
|
|
case nir_op_ior:
|
|
|
|
return const_value_int(0, bit_size);
|
|
|
|
case nir_op_ixor:
|
|
|
|
return const_value_int(0, bit_size);
|
|
|
|
default:
|
|
|
|
unreachable("Invalid reduction operation");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_function_impl *
|
|
|
|
nir_cf_node_get_function(nir_cf_node *node)
|
|
|
|
{
|
|
|
|
while (node->type != nir_cf_node_function) {
|
|
|
|
node = node->parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nir_cf_node_as_function(node);
|
|
|
|
}
|
|
|
|
|
2015-12-26 18:32:10 +00:00
|
|
|
/* Reduces a cursor by trying to convert everything to after and trying to
|
|
|
|
* go up to block granularity when possible.
|
|
|
|
*/
|
|
|
|
static nir_cursor
|
|
|
|
reduce_cursor(nir_cursor cursor)
|
|
|
|
{
|
|
|
|
switch (cursor.option) {
|
|
|
|
case nir_cursor_before_block:
|
|
|
|
assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
|
|
|
|
nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
|
|
|
|
if (exec_list_is_empty(&cursor.block->instr_list)) {
|
|
|
|
/* Empty block. After is as good as before. */
|
|
|
|
cursor.option = nir_cursor_after_block;
|
|
|
|
}
|
|
|
|
return cursor;
|
|
|
|
|
|
|
|
case nir_cursor_after_block:
|
|
|
|
return cursor;
|
|
|
|
|
|
|
|
case nir_cursor_before_instr: {
|
|
|
|
nir_instr *prev_instr = nir_instr_prev(cursor.instr);
|
|
|
|
if (prev_instr) {
|
|
|
|
/* Before this instruction is after the previous */
|
|
|
|
cursor.instr = prev_instr;
|
|
|
|
cursor.option = nir_cursor_after_instr;
|
|
|
|
} else {
|
|
|
|
/* No previous instruction. Switch to before block */
|
|
|
|
cursor.block = cursor.instr->block;
|
|
|
|
cursor.option = nir_cursor_before_block;
|
|
|
|
}
|
|
|
|
return reduce_cursor(cursor);
|
|
|
|
}
|
|
|
|
|
|
|
|
case nir_cursor_after_instr:
|
|
|
|
if (nir_instr_next(cursor.instr) == NULL) {
|
|
|
|
/* This is the last instruction, switch to after block */
|
|
|
|
cursor.option = nir_cursor_after_block;
|
|
|
|
cursor.block = cursor.instr->block;
|
|
|
|
}
|
|
|
|
return cursor;
|
|
|
|
|
|
|
|
default:
|
|
|
|
unreachable("Inavlid cursor option");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
nir_cursors_equal(nir_cursor a, nir_cursor b)
|
|
|
|
{
|
|
|
|
/* Reduced cursors should be unique */
|
|
|
|
a = reduce_cursor(a);
|
|
|
|
b = reduce_cursor(b);
|
|
|
|
|
|
|
|
return a.block == b.block && a.option == b.option;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
|
|
|
add_use_cb(nir_src *src, void *state)
|
|
|
|
{
|
2015-02-10 19:23:55 +00:00
|
|
|
nir_instr *instr = state;
|
2014-08-01 00:16:23 +01:00
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
src->parent_instr = instr;
|
|
|
|
list_addtail(&src->use_link,
|
|
|
|
src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-12-16 03:38:14 +00:00
|
|
|
static bool
|
|
|
|
add_ssa_def_cb(nir_ssa_def *def, void *state)
|
2014-11-20 00:06:32 +00:00
|
|
|
{
|
2015-02-10 19:23:55 +00:00
|
|
|
nir_instr *instr = state;
|
2014-12-16 03:38:14 +00:00
|
|
|
|
2014-11-20 00:06:32 +00:00
|
|
|
if (instr->block && def->index == UINT_MAX) {
|
|
|
|
nir_function_impl *impl =
|
|
|
|
nir_cf_node_get_function(&instr->block->cf_node);
|
|
|
|
|
|
|
|
def->index = impl->ssa_alloc++;
|
|
|
|
}
|
2014-12-16 03:38:14 +00:00
|
|
|
|
|
|
|
return true;
|
2014-11-20 00:06:32 +00:00
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
2014-12-16 03:38:14 +00:00
|
|
|
add_reg_def_cb(nir_dest *dest, void *state)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-02-10 19:23:55 +00:00
|
|
|
nir_instr *instr = state;
|
2014-08-01 00:16:23 +01:00
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
if (!dest->is_ssa) {
|
|
|
|
dest->reg.parent_instr = instr;
|
|
|
|
list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
add_defs_uses(nir_instr *instr)
|
|
|
|
{
|
2014-12-16 03:38:14 +00:00
|
|
|
nir_foreach_src(instr, add_use_cb, instr);
|
|
|
|
nir_foreach_dest(instr, add_reg_def_cb, instr);
|
|
|
|
nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2015-08-10 02:30:33 +01:00
|
|
|
nir_instr_insert(nir_cursor cursor, nir_instr *instr)
|
|
|
|
{
|
|
|
|
switch (cursor.option) {
|
|
|
|
case nir_cursor_before_block:
|
|
|
|
/* Only allow inserting jumps into empty blocks. */
|
|
|
|
if (instr->type == nir_instr_type_jump)
|
|
|
|
assert(exec_list_is_empty(&cursor.block->instr_list));
|
|
|
|
|
|
|
|
instr->block = cursor.block;
|
|
|
|
add_defs_uses(instr);
|
|
|
|
exec_list_push_head(&cursor.block->instr_list, &instr->node);
|
|
|
|
break;
|
|
|
|
case nir_cursor_after_block: {
|
|
|
|
/* Inserting instructions after a jump is illegal. */
|
|
|
|
nir_instr *last = nir_block_last_instr(cursor.block);
|
|
|
|
assert(last == NULL || last->type != nir_instr_type_jump);
|
|
|
|
(void) last;
|
|
|
|
|
|
|
|
instr->block = cursor.block;
|
|
|
|
add_defs_uses(instr);
|
|
|
|
exec_list_push_tail(&cursor.block->instr_list, &instr->node);
|
|
|
|
break;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
2015-08-10 02:30:33 +01:00
|
|
|
case nir_cursor_before_instr:
|
|
|
|
assert(instr->type != nir_instr_type_jump);
|
|
|
|
instr->block = cursor.instr->block;
|
|
|
|
add_defs_uses(instr);
|
|
|
|
exec_node_insert_node_before(&cursor.instr->node, &instr->node);
|
|
|
|
break;
|
|
|
|
case nir_cursor_after_instr:
|
|
|
|
/* Inserting instructions after a jump is illegal. */
|
|
|
|
assert(cursor.instr->type != nir_instr_type_jump);
|
|
|
|
|
|
|
|
/* Only allow inserting jumps at the end of the block. */
|
|
|
|
if (instr->type == nir_instr_type_jump)
|
|
|
|
assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
|
|
|
|
|
|
|
|
instr->block = cursor.instr->block;
|
|
|
|
add_defs_uses(instr);
|
|
|
|
exec_node_insert_after(&cursor.instr->node, &instr->node);
|
|
|
|
break;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
2015-08-10 02:30:33 +01:00
|
|
|
if (instr->type == nir_instr_type_jump)
|
|
|
|
nir_handle_add_jump(instr->block);
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
2015-09-09 23:58:08 +01:00
|
|
|
static bool
|
|
|
|
src_is_valid(const nir_src *src)
|
|
|
|
{
|
|
|
|
return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
|
|
|
remove_use_cb(nir_src *src, void *state)
|
|
|
|
{
|
2015-08-25 18:19:12 +01:00
|
|
|
(void) state;
|
|
|
|
|
2015-09-09 23:58:08 +01:00
|
|
|
if (src_is_valid(src))
|
|
|
|
list_del(&src->use_link);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
remove_def_cb(nir_dest *dest, void *state)
|
|
|
|
{
|
2015-08-25 18:19:12 +01:00
|
|
|
(void) state;
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
if (!dest->is_ssa)
|
|
|
|
list_del(&dest->reg.def_link);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
remove_defs_uses(nir_instr *instr)
|
|
|
|
{
|
|
|
|
nir_foreach_dest(instr, remove_def_cb, instr);
|
|
|
|
nir_foreach_src(instr, remove_use_cb, instr);
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:52:04 +00:00
|
|
|
void nir_instr_remove_v(nir_instr *instr)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
|
|
|
remove_defs_uses(instr);
|
|
|
|
exec_node_remove(&instr->node);
|
|
|
|
|
|
|
|
if (instr->type == nir_instr_type_jump) {
|
|
|
|
nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
|
2015-07-22 03:54:18 +01:00
|
|
|
nir_handle_remove_jump(instr->block, jump_instr->type);
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*@}*/
|
|
|
|
|
|
|
|
void
|
|
|
|
nir_index_local_regs(nir_function_impl *impl)
|
|
|
|
{
|
|
|
|
unsigned index = 0;
|
|
|
|
foreach_list_typed(nir_register, reg, node, &impl->registers) {
|
|
|
|
reg->index = index++;
|
|
|
|
}
|
2014-10-31 04:18:22 +00:00
|
|
|
impl->reg_alloc = index;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nir_index_global_regs(nir_shader *shader)
|
|
|
|
{
|
|
|
|
unsigned index = 0;
|
|
|
|
foreach_list_typed(nir_register, reg, node, &shader->registers) {
|
|
|
|
reg->index = index++;
|
|
|
|
}
|
2014-10-31 04:18:22 +00:00
|
|
|
shader->reg_alloc = index;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
|
|
|
|
{
|
|
|
|
return cb(&instr->dest.dest, state);
|
|
|
|
}
|
|
|
|
|
2018-03-15 04:45:38 +00:00
|
|
|
static bool
|
|
|
|
visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
|
|
|
|
{
|
|
|
|
return cb(&instr->dest, state);
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
|
|
|
visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
|
|
|
|
void *state)
|
|
|
|
{
|
|
|
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
|
|
|
return cb(&instr->dest, state);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
|
|
|
|
void *state)
|
|
|
|
{
|
|
|
|
return cb(&instr->dest, state);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
|
|
|
|
{
|
|
|
|
return cb(&instr->dest, state);
|
|
|
|
}
|
|
|
|
|
2014-10-31 04:04:15 +00:00
|
|
|
static bool
|
|
|
|
visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
|
|
|
|
nir_foreach_dest_cb cb, void *state)
|
|
|
|
{
|
2016-04-27 04:21:27 +01:00
|
|
|
nir_foreach_parallel_copy_entry(entry, instr) {
|
2014-12-18 00:53:04 +00:00
|
|
|
if (!cb(&entry->dest, state))
|
2014-10-31 04:04:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
bool
|
|
|
|
nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
|
|
|
|
{
|
|
|
|
switch (instr->type) {
|
|
|
|
case nir_instr_type_alu:
|
|
|
|
return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
|
2018-03-15 04:45:38 +00:00
|
|
|
case nir_instr_type_deref:
|
|
|
|
return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
|
2014-08-01 00:16:23 +01:00
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
|
2014-12-05 19:03:06 +00:00
|
|
|
case nir_instr_type_tex:
|
|
|
|
return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
|
2014-08-01 00:16:23 +01:00
|
|
|
case nir_instr_type_phi:
|
|
|
|
return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
|
2014-10-31 04:04:15 +00:00
|
|
|
case nir_instr_type_parallel_copy:
|
|
|
|
return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
|
|
|
|
cb, state);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2014-12-16 01:32:56 +00:00
|
|
|
case nir_instr_type_load_const:
|
2014-08-01 00:16:23 +01:00
|
|
|
case nir_instr_type_ssa_undef:
|
|
|
|
case nir_instr_type_call:
|
|
|
|
case nir_instr_type_jump:
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
unreachable("Invalid instruction type");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-12-15 23:12:59 +00:00
|
|
|
struct foreach_ssa_def_state {
|
|
|
|
nir_foreach_ssa_def_cb cb;
|
|
|
|
void *client_state;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_ssa_def_visitor(nir_dest *dest, void *void_state)
|
|
|
|
{
|
|
|
|
struct foreach_ssa_def_state *state = void_state;
|
|
|
|
|
|
|
|
if (dest->is_ssa)
|
|
|
|
return state->cb(&dest->ssa, state->client_state);
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
|
|
|
|
{
|
|
|
|
switch (instr->type) {
|
|
|
|
case nir_instr_type_alu:
|
2018-03-15 04:45:38 +00:00
|
|
|
case nir_instr_type_deref:
|
2014-12-15 23:12:59 +00:00
|
|
|
case nir_instr_type_tex:
|
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
case nir_instr_type_phi:
|
|
|
|
case nir_instr_type_parallel_copy: {
|
|
|
|
struct foreach_ssa_def_state foreach_state = {cb, state};
|
|
|
|
return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
|
|
|
|
}
|
|
|
|
|
2014-12-16 01:32:56 +00:00
|
|
|
case nir_instr_type_load_const:
|
|
|
|
return cb(&nir_instr_as_load_const(instr)->def, state);
|
2014-12-15 23:12:59 +00:00
|
|
|
case nir_instr_type_ssa_undef:
|
|
|
|
return cb(&nir_instr_as_ssa_undef(instr)->def, state);
|
|
|
|
case nir_instr_type_call:
|
|
|
|
case nir_instr_type_jump:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid instruction type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
|
|
|
visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
|
|
|
if (!cb(src, state))
|
|
|
|
return false;
|
|
|
|
if (!src->is_ssa && src->reg.indirect)
|
|
|
|
return cb(src->reg.indirect, state);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
|
|
|
|
if (!visit_src(&instr->src[i].src, cb, state))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-03-15 04:45:38 +00:00
|
|
|
static bool
|
|
|
|
visit_deref_instr_src(nir_deref_instr *instr,
|
|
|
|
nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
|
|
|
if (instr->deref_type != nir_deref_type_var) {
|
|
|
|
if (!visit_src(&instr->parent, cb, state))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (instr->deref_type == nir_deref_type_array) {
|
|
|
|
if (!visit_src(&instr->arr.index, cb, state))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
|
|
|
visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
2016-02-09 18:48:42 +00:00
|
|
|
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
2015-01-15 22:28:14 +00:00
|
|
|
if (!visit_src(&instr->src[i].src, cb, state))
|
2014-08-01 00:16:23 +01:00
|
|
|
return false;
|
2016-02-09 18:48:42 +00:00
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
|
|
|
|
void *state)
|
|
|
|
{
|
|
|
|
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
2016-02-09 18:48:42 +00:00
|
|
|
for (unsigned i = 0; i < num_srcs; i++) {
|
2014-08-01 00:16:23 +01:00
|
|
|
if (!visit_src(&instr->src[i], cb, state))
|
|
|
|
return false;
|
2016-02-09 18:48:42 +00:00
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-03-22 23:41:18 +00:00
|
|
|
static bool
|
|
|
|
visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < instr->num_params; i++) {
|
|
|
|
if (!visit_src(&instr->params[i], cb, state))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
static bool
|
|
|
|
visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
2016-04-27 04:16:21 +01:00
|
|
|
nir_foreach_phi_src(src, instr) {
|
2014-08-01 00:16:23 +01:00
|
|
|
if (!visit_src(&src->src, cb, state))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-10-31 04:04:15 +00:00
|
|
|
static bool
|
|
|
|
visit_parallel_copy_src(nir_parallel_copy_instr *instr,
|
|
|
|
nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
2016-04-27 04:21:27 +01:00
|
|
|
nir_foreach_parallel_copy_entry(entry, instr) {
|
2014-12-18 00:53:04 +00:00
|
|
|
if (!visit_src(&entry->src, cb, state))
|
2014-10-31 04:04:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
typedef struct {
|
|
|
|
void *state;
|
|
|
|
nir_foreach_src_cb cb;
|
|
|
|
} visit_dest_indirect_state;
|
|
|
|
|
|
|
|
static bool
|
|
|
|
visit_dest_indirect(nir_dest *dest, void *_state)
|
|
|
|
{
|
|
|
|
visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
|
|
|
|
|
|
|
|
if (!dest->is_ssa && dest->reg.indirect)
|
|
|
|
return state->cb(dest->reg.indirect, state->state);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
|
|
|
|
{
|
|
|
|
switch (instr->type) {
|
|
|
|
case nir_instr_type_alu:
|
|
|
|
if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
|
|
|
|
return false;
|
|
|
|
break;
|
2018-03-15 04:45:38 +00:00
|
|
|
case nir_instr_type_deref:
|
|
|
|
if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
|
|
|
|
return false;
|
|
|
|
break;
|
2014-08-01 00:16:23 +01:00
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
|
|
|
|
return false;
|
|
|
|
break;
|
2014-12-05 19:03:06 +00:00
|
|
|
case nir_instr_type_tex:
|
|
|
|
if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
|
2014-08-01 00:16:23 +01:00
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
case nir_instr_type_call:
|
2018-03-22 23:41:18 +00:00
|
|
|
if (!visit_call_src(nir_instr_as_call(instr), cb, state))
|
|
|
|
return false;
|
2014-08-01 00:16:23 +01:00
|
|
|
break;
|
|
|
|
case nir_instr_type_load_const:
|
nir: Remove empty visit_call_src and visit_load_const_src functions
The guts were removed in dfb3abba. It has been almost exactly a year,
so I dont think we're going to "decide we want [predication] back."
Silences several "unused parameter" warnings:
nir/nir.c: In function ‘visit_call_src’:
nir/nir.c:1052:32: warning: unused parameter ‘instr’ [-Wunused-parameter]
visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
^
nir/nir.c:1052:58: warning: unused parameter ‘cb’ [-Wunused-parameter]
visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
^
nir/nir.c:1052:68: warning: unused parameter ‘state’ [-Wunused-parameter]
visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
^
nir/nir.c: In function ‘visit_load_const_src’:
nir/nir.c:1058:44: warning: unused parameter ‘instr’ [-Wunused-parameter]
visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
^
nir/nir.c:1058:70: warning: unused parameter ‘cb’ [-Wunused-parameter]
visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
^
nir/nir.c:1059:28: warning: unused parameter ‘state’ [-Wunused-parameter]
void *state)
^
v2: Add some comments in nir_foreach_src suggested by Jason.
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Cc: Connor Abbott <cwabbott0@gmail.com>
2015-12-14 23:19:25 +00:00
|
|
|
/* Constant load instructions have no regular sources */
|
2014-08-01 00:16:23 +01:00
|
|
|
break;
|
|
|
|
case nir_instr_type_phi:
|
|
|
|
if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
|
|
|
|
return false;
|
|
|
|
break;
|
2014-10-31 04:04:15 +00:00
|
|
|
case nir_instr_type_parallel_copy:
|
|
|
|
if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
|
|
|
|
cb, state))
|
|
|
|
return false;
|
|
|
|
break;
|
2014-08-01 00:16:23 +01:00
|
|
|
case nir_instr_type_jump:
|
|
|
|
case nir_instr_type_ssa_undef:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
default:
|
|
|
|
unreachable("Invalid instruction type");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
visit_dest_indirect_state dest_state;
|
|
|
|
dest_state.state = state;
|
|
|
|
dest_state.cb = cb;
|
|
|
|
return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
|
|
|
|
}
|
|
|
|
|
2014-12-09 01:34:23 +00:00
|
|
|
nir_const_value *
|
|
|
|
nir_src_as_const_value(nir_src src)
|
|
|
|
{
|
|
|
|
if (!src.is_ssa)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (src.ssa->parent_instr->type != nir_instr_type_load_const)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
|
|
|
|
|
2014-12-16 01:32:56 +00:00
|
|
|
return &load->value;
|
2014-12-09 01:34:23 +00:00
|
|
|
}
|
|
|
|
|
2015-07-30 12:10:08 +01:00
|
|
|
/**
|
|
|
|
* Returns true if the source is known to be dynamically uniform. Otherwise it
|
|
|
|
* returns false which means it may or may not be dynamically uniform but it
|
|
|
|
* can't be determined.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
nir_src_is_dynamically_uniform(nir_src src)
|
|
|
|
{
|
|
|
|
if (!src.is_ssa)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Constants are trivially dynamically uniform */
|
|
|
|
if (src.ssa->parent_instr->type == nir_instr_type_load_const)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* As are uniform variables */
|
|
|
|
if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
|
|
|
|
|
|
|
|
if (intr->intrinsic == nir_intrinsic_load_uniform)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* XXX: this could have many more tests, such as when a sampler function is
|
|
|
|
* called with dynamically uniform arguments.
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
static void
|
|
|
|
src_remove_all_uses(nir_src *src)
|
2014-11-14 03:07:22 +00:00
|
|
|
{
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
|
|
|
|
if (!src_is_valid(src))
|
|
|
|
continue;
|
2014-11-14 03:07:22 +00:00
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_del(&src->use_link);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
|
|
|
|
{
|
|
|
|
for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
|
|
|
|
if (!src_is_valid(src))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (parent_instr) {
|
|
|
|
src->parent_instr = parent_instr;
|
|
|
|
if (src->is_ssa)
|
|
|
|
list_addtail(&src->use_link, &src->ssa->uses);
|
|
|
|
else
|
|
|
|
list_addtail(&src->use_link, &src->reg.reg->uses);
|
|
|
|
} else {
|
|
|
|
assert(parent_if);
|
|
|
|
src->parent_if = parent_if;
|
|
|
|
if (src->is_ssa)
|
|
|
|
list_addtail(&src->use_link, &src->ssa->if_uses);
|
|
|
|
else
|
|
|
|
list_addtail(&src->use_link, &src->reg.reg->if_uses);
|
|
|
|
}
|
2014-11-14 03:07:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
|
|
|
|
{
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
assert(!src_is_valid(src) || src->parent_instr == instr);
|
|
|
|
|
|
|
|
src_remove_all_uses(src);
|
2015-04-13 22:02:21 +01:00
|
|
|
*src = new_src;
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
src_add_all_uses(src, instr, NULL);
|
|
|
|
}
|
2015-04-13 22:02:21 +01:00
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
void
|
|
|
|
nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
|
|
|
|
{
|
|
|
|
assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
|
2014-11-14 03:07:22 +00:00
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
src_remove_all_uses(dest);
|
|
|
|
src_remove_all_uses(src);
|
|
|
|
*dest = *src;
|
|
|
|
*src = NIR_SRC_INIT;
|
|
|
|
src_add_all_uses(dest, dest_instr, NULL);
|
2014-11-14 03:07:22 +00:00
|
|
|
}
|
|
|
|
|
2015-04-24 18:34:30 +01:00
|
|
|
void
|
|
|
|
nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
|
|
|
|
{
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
nir_src *src = &if_stmt->condition;
|
|
|
|
assert(!src_is_valid(src) || src->parent_if == if_stmt);
|
2015-04-24 18:34:30 +01:00
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
src_remove_all_uses(src);
|
|
|
|
*src = new_src;
|
|
|
|
src_add_all_uses(src, NULL, if_stmt);
|
2015-04-24 18:34:30 +01:00
|
|
|
}
|
|
|
|
|
2015-09-09 23:58:25 +01:00
|
|
|
void
|
|
|
|
nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
|
|
|
|
{
|
|
|
|
if (dest->is_ssa) {
|
|
|
|
/* We can only overwrite an SSA destination if it has no uses. */
|
|
|
|
assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
|
|
|
|
} else {
|
|
|
|
list_del(&dest->reg.def_link);
|
|
|
|
if (dest->reg.indirect)
|
|
|
|
src_remove_all_uses(dest->reg.indirect);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We can't re-write with an SSA def */
|
|
|
|
assert(!new_dest.is_ssa);
|
|
|
|
|
|
|
|
nir_dest_copy(dest, &new_dest, instr);
|
|
|
|
|
|
|
|
dest->reg.parent_instr = instr;
|
|
|
|
list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
|
|
|
|
|
|
|
|
if (dest->reg.indirect)
|
|
|
|
src_add_all_uses(dest->reg.indirect, instr, NULL);
|
|
|
|
}
|
|
|
|
|
2016-03-22 19:02:42 +00:00
|
|
|
/* note: does *not* take ownership of 'name' */
|
2014-11-04 18:40:48 +00:00
|
|
|
void
|
2014-11-20 00:06:32 +00:00
|
|
|
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
2015-11-17 12:57:54 +00:00
|
|
|
unsigned num_components,
|
|
|
|
unsigned bit_size, const char *name)
|
2014-11-04 18:40:48 +00:00
|
|
|
{
|
2016-03-22 19:02:42 +00:00
|
|
|
def->name = ralloc_strdup(instr, name);
|
2014-11-04 18:40:48 +00:00
|
|
|
def->parent_instr = instr;
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_inithead(&def->uses);
|
|
|
|
list_inithead(&def->if_uses);
|
2014-11-04 18:40:48 +00:00
|
|
|
def->num_components = num_components;
|
2015-11-17 12:57:54 +00:00
|
|
|
def->bit_size = bit_size;
|
2014-11-20 00:06:32 +00:00
|
|
|
|
|
|
|
if (instr->block) {
|
|
|
|
nir_function_impl *impl =
|
|
|
|
nir_cf_node_get_function(&instr->block->cf_node);
|
|
|
|
|
|
|
|
def->index = impl->ssa_alloc++;
|
|
|
|
} else {
|
|
|
|
def->index = UINT_MAX;
|
|
|
|
}
|
2014-11-04 18:40:48 +00:00
|
|
|
}
|
|
|
|
|
2016-03-22 19:02:42 +00:00
|
|
|
/* note: does *not* take ownership of 'name' */
|
2015-01-21 00:23:51 +00:00
|
|
|
void
|
|
|
|
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
|
2015-11-17 12:57:54 +00:00
|
|
|
unsigned num_components, unsigned bit_size,
|
|
|
|
const char *name)
|
2015-01-21 00:23:51 +00:00
|
|
|
{
|
|
|
|
dest->is_ssa = true;
|
2015-11-17 12:57:54 +00:00
|
|
|
nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
|
2015-01-21 00:23:51 +00:00
|
|
|
}
|
|
|
|
|
2014-11-05 01:18:48 +00:00
|
|
|
void
|
2015-09-09 21:24:35 +01:00
|
|
|
nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
|
2014-11-05 01:18:48 +00:00
|
|
|
{
|
|
|
|
assert(!new_src.is_ssa || def != new_src.ssa);
|
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
nir_foreach_use_safe(use_src, def)
|
2015-11-12 05:13:26 +00:00
|
|
|
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
|
2014-11-05 01:18:48 +00:00
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
nir_foreach_if_use_safe(use_src, def)
|
2015-11-12 05:13:26 +00:00
|
|
|
nir_if_rewrite_condition(use_src->parent_if, new_src);
|
2014-11-05 01:18:48 +00:00
|
|
|
}
|
|
|
|
|
2015-11-12 16:40:17 +00:00
|
|
|
static bool
|
|
|
|
is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
|
|
|
|
{
|
|
|
|
assert(start->block == end->block);
|
|
|
|
|
|
|
|
if (between->block != start->block)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Search backwards looking for "between" */
|
|
|
|
while (start != end) {
|
|
|
|
if (between == end)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
end = nir_instr_prev(end);
|
|
|
|
assert(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Replaces all uses of the given SSA def with the given source but only if
|
|
|
|
* the use comes after the after_me instruction. This can be useful if you
|
|
|
|
* are emitting code to fix up the result of some instruction: you can freely
|
|
|
|
* use the result in that code and then call rewrite_uses_after and pass the
|
|
|
|
* last fixup instruction as after_me and it will replace all of the uses you
|
|
|
|
* want without touching the fixup code.
|
|
|
|
*
|
|
|
|
* This function assumes that after_me is in the same block as
|
|
|
|
* def->parent_instr and that after_me comes after def->parent_instr.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
|
|
|
|
nir_instr *after_me)
|
|
|
|
{
|
|
|
|
assert(!new_src.is_ssa || def != new_src.ssa);
|
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
nir_foreach_use_safe(use_src, def) {
|
2015-11-12 16:40:17 +00:00
|
|
|
assert(use_src->parent_instr != def->parent_instr);
|
|
|
|
/* Since def already dominates all of its uses, the only way a use can
|
|
|
|
* not be dominated by after_me is if it is between def and after_me in
|
|
|
|
* the instruction list.
|
|
|
|
*/
|
|
|
|
if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
|
|
|
|
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
|
|
|
|
}
|
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
nir_foreach_if_use_safe(use_src, def)
|
2015-11-12 16:40:17 +00:00
|
|
|
nir_if_rewrite_condition(use_src->parent_if, new_src);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2018-07-12 02:40:23 +01:00
|
|
|
nir_component_mask_t
|
2018-01-11 21:07:23 +00:00
|
|
|
nir_ssa_def_components_read(const nir_ssa_def *def)
|
2015-10-09 16:13:43 +01:00
|
|
|
{
|
2018-07-12 02:40:23 +01:00
|
|
|
nir_component_mask_t read_mask = 0;
|
2016-04-27 04:30:10 +01:00
|
|
|
nir_foreach_use(use, def) {
|
2015-10-09 16:13:43 +01:00
|
|
|
if (use->parent_instr->type == nir_instr_type_alu) {
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
|
|
|
|
nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
|
|
|
|
int src_idx = alu_src - &alu->src[0];
|
|
|
|
assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
|
|
|
|
|
2018-07-12 02:40:23 +01:00
|
|
|
for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
|
2015-10-09 16:13:43 +01:00
|
|
|
if (!nir_alu_instr_channel_used(alu, src_idx, c))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
read_mask |= (1 << alu_src->swizzle[c]);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return (1 << def->num_components) - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-25 18:33:33 +01:00
|
|
|
if (!list_empty(&def->if_uses))
|
|
|
|
read_mask |= 1;
|
|
|
|
|
2015-10-09 16:13:43 +01:00
|
|
|
return read_mask;
|
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_block *
|
|
|
|
nir_block_cf_tree_next(nir_block *block)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
if (block == NULL) {
|
|
|
|
/* nir_foreach_block_safe() will call this function on a NULL block
|
|
|
|
* after the last iteration, but it won't use the result so just return
|
|
|
|
* NULL here.
|
|
|
|
*/
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
|
|
|
|
if (cf_next)
|
|
|
|
return nir_cf_node_cf_tree_first(cf_next);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_cf_node *parent = block->cf_node.parent;
|
|
|
|
|
|
|
|
switch (parent->type) {
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
/* Are we at the end of the if? Go to the beginning of the else */
|
|
|
|
nir_if *if_stmt = nir_cf_node_as_if(parent);
|
2016-10-06 03:08:57 +01:00
|
|
|
if (block == nir_if_last_then_block(if_stmt))
|
|
|
|
return nir_if_first_else_block(if_stmt);
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
|
2016-10-06 03:08:57 +01:00
|
|
|
assert(block == nir_if_last_else_block(if_stmt));
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
/* fall through */
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
case nir_cf_node_loop:
|
|
|
|
return nir_cf_node_as_block(nir_cf_node_next(parent));
|
|
|
|
|
|
|
|
case nir_cf_node_function:
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
default:
|
|
|
|
unreachable("unknown cf node type");
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_block *
|
|
|
|
nir_block_cf_tree_prev(nir_block *block)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
if (block == NULL) {
|
|
|
|
/* do this for consistency with nir_block_cf_tree_next() */
|
|
|
|
return NULL;
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
|
|
|
|
if (cf_prev)
|
|
|
|
return nir_cf_node_cf_tree_last(cf_prev);
|
|
|
|
|
|
|
|
nir_cf_node *parent = block->cf_node.parent;
|
|
|
|
|
|
|
|
switch (parent->type) {
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
/* Are we at the beginning of the else? Go to the end of the if */
|
|
|
|
nir_if *if_stmt = nir_cf_node_as_if(parent);
|
2016-10-06 03:08:57 +01:00
|
|
|
if (block == nir_if_first_else_block(if_stmt))
|
|
|
|
return nir_if_last_then_block(if_stmt);
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
|
2016-10-06 03:08:57 +01:00
|
|
|
assert(block == nir_if_first_then_block(if_stmt));
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
/* fall through */
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
case nir_cf_node_loop:
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
return nir_cf_node_as_block(nir_cf_node_prev(parent));
|
|
|
|
|
|
|
|
case nir_cf_node_function:
|
|
|
|
return NULL;
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
default:
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
unreachable("unknown cf node type");
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
|
2015-05-08 19:40:58 +01:00
|
|
|
{
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
switch (node->type) {
|
|
|
|
case nir_cf_node_function: {
|
|
|
|
nir_function_impl *impl = nir_cf_node_as_function(node);
|
|
|
|
return nir_start_block(impl);
|
|
|
|
}
|
2015-05-08 19:40:58 +01:00
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
case nir_cf_node_if: {
|
|
|
|
nir_if *if_stmt = nir_cf_node_as_if(node);
|
2016-10-06 03:08:57 +01:00
|
|
|
return nir_if_first_then_block(if_stmt);
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
case nir_cf_node_loop: {
|
|
|
|
nir_loop *loop = nir_cf_node_as_loop(node);
|
2016-10-06 03:08:57 +01:00
|
|
|
return nir_loop_first_block(loop);
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
case nir_cf_node_block: {
|
|
|
|
return nir_cf_node_as_block(node);
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
default:
|
|
|
|
unreachable("unknown node type");
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
|
2014-10-29 21:16:54 +00:00
|
|
|
{
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
switch (node->type) {
|
|
|
|
case nir_cf_node_function: {
|
|
|
|
nir_function_impl *impl = nir_cf_node_as_function(node);
|
|
|
|
return nir_impl_last_block(impl);
|
|
|
|
}
|
2014-10-29 21:16:54 +00:00
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
case nir_cf_node_if: {
|
|
|
|
nir_if *if_stmt = nir_cf_node_as_if(node);
|
2016-10-06 03:08:57 +01:00
|
|
|
return nir_if_last_else_block(if_stmt);
|
2014-10-29 21:16:54 +00:00
|
|
|
}
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
case nir_cf_node_loop: {
|
|
|
|
nir_loop *loop = nir_cf_node_as_loop(node);
|
2016-10-06 03:08:57 +01:00
|
|
|
return nir_loop_last_block(loop);
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
case nir_cf_node_block: {
|
|
|
|
return nir_cf_node_as_block(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
unreachable("unknown node type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
|
|
|
|
{
|
|
|
|
if (node->type == nir_cf_node_block)
|
2016-12-19 18:46:04 +00:00
|
|
|
return nir_block_cf_tree_next(nir_cf_node_as_block(node));
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
else if (node->type == nir_cf_node_function)
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return nir_cf_node_as_block(nir_cf_node_next(node));
|
2014-10-29 21:16:54 +00:00
|
|
|
}
|
|
|
|
|
2014-10-29 23:25:51 +00:00
|
|
|
nir_if *
|
2014-12-17 22:49:24 +00:00
|
|
|
nir_block_get_following_if(nir_block *block)
|
2014-10-29 23:25:51 +00:00
|
|
|
{
|
|
|
|
if (exec_node_is_tail_sentinel(&block->cf_node.node))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (nir_cf_node_is_last(&block->cf_node))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
|
|
|
|
|
|
|
|
if (next_node->type != nir_cf_node_if)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return nir_cf_node_as_if(next_node);
|
|
|
|
}
|
|
|
|
|
2015-05-08 18:17:10 +01:00
|
|
|
nir_loop *
|
|
|
|
nir_block_get_following_loop(nir_block *block)
|
|
|
|
{
|
|
|
|
if (exec_node_is_tail_sentinel(&block->cf_node.node))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (nir_cf_node_is_last(&block->cf_node))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
|
|
|
|
|
|
|
|
if (next_node->type != nir_cf_node_loop)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return nir_cf_node_as_loop(next_node);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
void
|
|
|
|
nir_index_blocks(nir_function_impl *impl)
|
|
|
|
{
|
|
|
|
unsigned index = 0;
|
|
|
|
|
2014-10-29 19:42:54 +00:00
|
|
|
if (impl->valid_metadata & nir_metadata_block_index)
|
2014-08-01 00:16:23 +01:00
|
|
|
return;
|
|
|
|
|
2016-04-08 20:51:26 +01:00
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
block->index = index++;
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
impl->num_blocks = index;
|
|
|
|
}
|
|
|
|
|
2015-02-04 05:04:57 +00:00
|
|
|
static bool
|
|
|
|
index_ssa_def_cb(nir_ssa_def *def, void *state)
|
2014-08-01 00:16:23 +01:00
|
|
|
{
|
2015-02-04 05:04:57 +00:00
|
|
|
unsigned *index = (unsigned *) state;
|
2014-08-01 00:16:23 +01:00
|
|
|
def->index = (*index)++;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-09-14 20:25:28 +01:00
|
|
|
/**
|
|
|
|
* The indices are applied top-to-bottom which has the very nice property
|
|
|
|
* that, if A dominates B, then A->index <= B->index.
|
|
|
|
*/
|
2014-08-01 00:16:23 +01:00
|
|
|
void
|
|
|
|
nir_index_ssa_defs(nir_function_impl *impl)
|
|
|
|
{
|
|
|
|
unsigned index = 0;
|
2015-08-04 00:02:16 +01:00
|
|
|
|
2016-04-08 20:51:26 +01:00
|
|
|
nir_foreach_block(block, impl) {
|
2016-04-27 02:34:19 +01:00
|
|
|
nir_foreach_instr(instr, block)
|
2016-04-08 20:51:26 +01:00
|
|
|
nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
|
|
|
|
}
|
2015-09-09 00:43:51 +01:00
|
|
|
|
2016-04-08 20:51:26 +01:00
|
|
|
impl->ssa_alloc = index;
|
2015-09-09 00:43:51 +01:00
|
|
|
}
|
|
|
|
|
2015-09-14 20:25:28 +01:00
|
|
|
/**
|
|
|
|
* The indices are applied top-to-bottom which has the very nice property
|
|
|
|
* that, if A dominates B, then A->index <= B->index.
|
|
|
|
*/
|
2015-09-09 00:43:51 +01:00
|
|
|
unsigned
|
|
|
|
nir_index_instrs(nir_function_impl *impl)
|
|
|
|
{
|
|
|
|
unsigned index = 0;
|
2016-04-08 20:51:26 +01:00
|
|
|
|
|
|
|
nir_foreach_block(block, impl) {
|
2016-04-27 02:34:19 +01:00
|
|
|
nir_foreach_instr(instr, block)
|
2016-04-08 20:51:26 +01:00
|
|
|
instr->index = index++;
|
|
|
|
}
|
|
|
|
|
2015-09-09 00:43:51 +01:00
|
|
|
return index;
|
|
|
|
}
|
|
|
|
|
2015-09-11 00:53:08 +01:00
|
|
|
nir_intrinsic_op
|
|
|
|
nir_intrinsic_from_system_value(gl_system_value val)
|
|
|
|
{
|
|
|
|
switch (val) {
|
|
|
|
case SYSTEM_VALUE_VERTEX_ID:
|
|
|
|
return nir_intrinsic_load_vertex_id;
|
|
|
|
case SYSTEM_VALUE_INSTANCE_ID:
|
|
|
|
return nir_intrinsic_load_instance_id;
|
2015-12-10 20:07:43 +00:00
|
|
|
case SYSTEM_VALUE_DRAW_ID:
|
|
|
|
return nir_intrinsic_load_draw_id;
|
|
|
|
case SYSTEM_VALUE_BASE_INSTANCE:
|
|
|
|
return nir_intrinsic_load_base_instance;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
|
|
|
|
return nir_intrinsic_load_vertex_id_zero_base;
|
2018-04-28 13:09:18 +01:00
|
|
|
case SYSTEM_VALUE_IS_INDEXED_DRAW:
|
|
|
|
return nir_intrinsic_load_is_indexed_draw;
|
2018-01-25 18:15:38 +00:00
|
|
|
case SYSTEM_VALUE_FIRST_VERTEX:
|
|
|
|
return nir_intrinsic_load_first_vertex;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_BASE_VERTEX:
|
|
|
|
return nir_intrinsic_load_base_vertex;
|
|
|
|
case SYSTEM_VALUE_INVOCATION_ID:
|
|
|
|
return nir_intrinsic_load_invocation_id;
|
2017-07-04 09:34:02 +01:00
|
|
|
case SYSTEM_VALUE_FRAG_COORD:
|
|
|
|
return nir_intrinsic_load_frag_coord;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_FRONT_FACE:
|
|
|
|
return nir_intrinsic_load_front_face;
|
|
|
|
case SYSTEM_VALUE_SAMPLE_ID:
|
|
|
|
return nir_intrinsic_load_sample_id;
|
|
|
|
case SYSTEM_VALUE_SAMPLE_POS:
|
|
|
|
return nir_intrinsic_load_sample_pos;
|
|
|
|
case SYSTEM_VALUE_SAMPLE_MASK_IN:
|
|
|
|
return nir_intrinsic_load_sample_mask_in;
|
|
|
|
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
|
|
|
|
return nir_intrinsic_load_local_invocation_id;
|
2016-05-22 23:54:48 +01:00
|
|
|
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
|
|
|
|
return nir_intrinsic_load_local_invocation_index;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_WORK_GROUP_ID:
|
|
|
|
return nir_intrinsic_load_work_group_id;
|
2015-08-23 02:51:08 +01:00
|
|
|
case SYSTEM_VALUE_NUM_WORK_GROUPS:
|
|
|
|
return nir_intrinsic_load_num_work_groups;
|
2015-09-23 23:40:33 +01:00
|
|
|
case SYSTEM_VALUE_PRIMITIVE_ID:
|
|
|
|
return nir_intrinsic_load_primitive_id;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_TESS_COORD:
|
2015-10-09 07:53:47 +01:00
|
|
|
return nir_intrinsic_load_tess_coord;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
|
2015-10-09 07:53:47 +01:00
|
|
|
return nir_intrinsic_load_tess_level_outer;
|
2015-09-11 00:53:08 +01:00
|
|
|
case SYSTEM_VALUE_TESS_LEVEL_INNER:
|
2015-10-09 07:53:47 +01:00
|
|
|
return nir_intrinsic_load_tess_level_inner;
|
|
|
|
case SYSTEM_VALUE_VERTICES_IN:
|
|
|
|
return nir_intrinsic_load_patch_vertices_in;
|
2015-11-14 01:50:27 +00:00
|
|
|
case SYSTEM_VALUE_HELPER_INVOCATION:
|
|
|
|
return nir_intrinsic_load_helper_invocation;
|
2017-03-21 22:22:10 +00:00
|
|
|
case SYSTEM_VALUE_VIEW_INDEX:
|
|
|
|
return nir_intrinsic_load_view_index;
|
2017-06-23 00:37:51 +01:00
|
|
|
case SYSTEM_VALUE_SUBGROUP_SIZE:
|
|
|
|
return nir_intrinsic_load_subgroup_size;
|
|
|
|
case SYSTEM_VALUE_SUBGROUP_INVOCATION:
|
|
|
|
return nir_intrinsic_load_subgroup_invocation;
|
|
|
|
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
|
2017-08-22 00:04:53 +01:00
|
|
|
return nir_intrinsic_load_subgroup_eq_mask;
|
2017-06-23 00:37:51 +01:00
|
|
|
case SYSTEM_VALUE_SUBGROUP_GE_MASK:
|
2017-08-22 00:04:53 +01:00
|
|
|
return nir_intrinsic_load_subgroup_ge_mask;
|
2017-06-23 00:37:51 +01:00
|
|
|
case SYSTEM_VALUE_SUBGROUP_GT_MASK:
|
2017-08-22 00:04:53 +01:00
|
|
|
return nir_intrinsic_load_subgroup_gt_mask;
|
2017-06-23 00:37:51 +01:00
|
|
|
case SYSTEM_VALUE_SUBGROUP_LE_MASK:
|
2017-08-22 00:04:53 +01:00
|
|
|
return nir_intrinsic_load_subgroup_le_mask;
|
2017-06-23 00:37:51 +01:00
|
|
|
case SYSTEM_VALUE_SUBGROUP_LT_MASK:
|
2017-08-22 00:04:53 +01:00
|
|
|
return nir_intrinsic_load_subgroup_lt_mask;
|
2017-09-30 22:50:40 +01:00
|
|
|
case SYSTEM_VALUE_NUM_SUBGROUPS:
|
|
|
|
return nir_intrinsic_load_num_subgroups;
|
|
|
|
case SYSTEM_VALUE_SUBGROUP_ID:
|
|
|
|
return nir_intrinsic_load_subgroup_id;
|
2017-12-25 20:18:00 +00:00
|
|
|
case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
|
|
|
|
return nir_intrinsic_load_local_group_size;
|
2018-05-25 22:49:47 +01:00
|
|
|
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
|
|
|
|
return nir_intrinsic_load_global_invocation_id;
|
2018-03-08 19:18:59 +00:00
|
|
|
case SYSTEM_VALUE_WORK_DIM:
|
|
|
|
return nir_intrinsic_load_work_dim;
|
2015-09-11 00:53:08 +01:00
|
|
|
default:
|
|
|
|
unreachable("system value does not directly correspond to intrinsic");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-04 00:02:16 +01:00
|
|
|
gl_system_value
|
|
|
|
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
|
|
|
{
|
|
|
|
switch (intrin) {
|
|
|
|
case nir_intrinsic_load_vertex_id:
|
|
|
|
return SYSTEM_VALUE_VERTEX_ID;
|
|
|
|
case nir_intrinsic_load_instance_id:
|
|
|
|
return SYSTEM_VALUE_INSTANCE_ID;
|
2015-12-10 20:07:43 +00:00
|
|
|
case nir_intrinsic_load_draw_id:
|
|
|
|
return SYSTEM_VALUE_DRAW_ID;
|
|
|
|
case nir_intrinsic_load_base_instance:
|
|
|
|
return SYSTEM_VALUE_BASE_INSTANCE;
|
2015-08-04 00:02:16 +01:00
|
|
|
case nir_intrinsic_load_vertex_id_zero_base:
|
|
|
|
return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
|
2018-01-25 18:15:38 +00:00
|
|
|
case nir_intrinsic_load_first_vertex:
|
|
|
|
return SYSTEM_VALUE_FIRST_VERTEX;
|
2018-04-28 13:09:18 +01:00
|
|
|
case nir_intrinsic_load_is_indexed_draw:
|
|
|
|
return SYSTEM_VALUE_IS_INDEXED_DRAW;
|
2015-08-04 00:02:16 +01:00
|
|
|
case nir_intrinsic_load_base_vertex:
|
|
|
|
return SYSTEM_VALUE_BASE_VERTEX;
|
|
|
|
case nir_intrinsic_load_invocation_id:
|
|
|
|
return SYSTEM_VALUE_INVOCATION_ID;
|
2017-07-04 09:34:02 +01:00
|
|
|
case nir_intrinsic_load_frag_coord:
|
|
|
|
return SYSTEM_VALUE_FRAG_COORD;
|
2015-08-04 00:02:16 +01:00
|
|
|
case nir_intrinsic_load_front_face:
|
|
|
|
return SYSTEM_VALUE_FRONT_FACE;
|
|
|
|
case nir_intrinsic_load_sample_id:
|
|
|
|
return SYSTEM_VALUE_SAMPLE_ID;
|
|
|
|
case nir_intrinsic_load_sample_pos:
|
|
|
|
return SYSTEM_VALUE_SAMPLE_POS;
|
|
|
|
case nir_intrinsic_load_sample_mask_in:
|
|
|
|
return SYSTEM_VALUE_SAMPLE_MASK_IN;
|
2015-03-13 18:32:43 +00:00
|
|
|
case nir_intrinsic_load_local_invocation_id:
|
|
|
|
return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
|
2016-05-22 23:54:48 +01:00
|
|
|
case nir_intrinsic_load_local_invocation_index:
|
|
|
|
return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
|
2015-08-23 02:51:08 +01:00
|
|
|
case nir_intrinsic_load_num_work_groups:
|
|
|
|
return SYSTEM_VALUE_NUM_WORK_GROUPS;
|
2015-03-13 18:37:03 +00:00
|
|
|
case nir_intrinsic_load_work_group_id:
|
|
|
|
return SYSTEM_VALUE_WORK_GROUP_ID;
|
2015-09-23 23:40:33 +01:00
|
|
|
case nir_intrinsic_load_primitive_id:
|
|
|
|
return SYSTEM_VALUE_PRIMITIVE_ID;
|
2015-10-09 07:53:47 +01:00
|
|
|
case nir_intrinsic_load_tess_coord:
|
2015-08-04 00:02:16 +01:00
|
|
|
return SYSTEM_VALUE_TESS_COORD;
|
2015-10-09 07:53:47 +01:00
|
|
|
case nir_intrinsic_load_tess_level_outer:
|
2015-08-04 00:02:16 +01:00
|
|
|
return SYSTEM_VALUE_TESS_LEVEL_OUTER;
|
2015-10-09 07:53:47 +01:00
|
|
|
case nir_intrinsic_load_tess_level_inner:
|
2015-08-04 00:02:16 +01:00
|
|
|
return SYSTEM_VALUE_TESS_LEVEL_INNER;
|
2015-10-09 07:53:47 +01:00
|
|
|
case nir_intrinsic_load_patch_vertices_in:
|
|
|
|
return SYSTEM_VALUE_VERTICES_IN;
|
2015-11-14 01:50:27 +00:00
|
|
|
case nir_intrinsic_load_helper_invocation:
|
|
|
|
return SYSTEM_VALUE_HELPER_INVOCATION;
|
2017-03-21 22:22:10 +00:00
|
|
|
case nir_intrinsic_load_view_index:
|
|
|
|
return SYSTEM_VALUE_VIEW_INDEX;
|
2017-08-22 00:05:20 +01:00
|
|
|
case nir_intrinsic_load_subgroup_size:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_SIZE;
|
|
|
|
case nir_intrinsic_load_subgroup_invocation:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_INVOCATION;
|
2017-06-23 00:37:51 +01:00
|
|
|
case nir_intrinsic_load_subgroup_eq_mask:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
|
|
|
|
case nir_intrinsic_load_subgroup_ge_mask:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_GE_MASK;
|
|
|
|
case nir_intrinsic_load_subgroup_gt_mask:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_GT_MASK;
|
|
|
|
case nir_intrinsic_load_subgroup_le_mask:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_LE_MASK;
|
|
|
|
case nir_intrinsic_load_subgroup_lt_mask:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_LT_MASK;
|
2017-09-30 22:50:40 +01:00
|
|
|
case nir_intrinsic_load_num_subgroups:
|
|
|
|
return SYSTEM_VALUE_NUM_SUBGROUPS;
|
|
|
|
case nir_intrinsic_load_subgroup_id:
|
|
|
|
return SYSTEM_VALUE_SUBGROUP_ID;
|
2017-12-25 20:18:00 +00:00
|
|
|
case nir_intrinsic_load_local_group_size:
|
|
|
|
return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
|
2018-05-25 22:49:47 +01:00
|
|
|
case nir_intrinsic_load_global_invocation_id:
|
|
|
|
return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
|
2015-08-04 00:02:16 +01:00
|
|
|
default:
|
|
|
|
unreachable("intrinsic doesn't produce a system value");
|
|
|
|
}
|
|
|
|
}
|