nir: Switch from ralloc to malloc for NIR instructions.
By replacing the 48-byte ralloc header with our exec_node gc_node (16 bytes), runtime of shader-db on my system across this series drops -4.21738% +/- 1.47757% (n=5). Inspired by discussion on #5034. Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11776>
This commit is contained in:
parent
feee5e6974
commit
879a569884
|
@ -100,6 +100,17 @@ nir_component_mask_reinterpret(nir_component_mask_t mask,
|
|||
return new_mask;
|
||||
}
|
||||
|
||||
static void
|
||||
nir_shader_destructor(void *ptr)
|
||||
{
|
||||
nir_shader *shader = ptr;
|
||||
|
||||
/* Free all instrs from the shader, since they're not ralloced. */
|
||||
list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
|
||||
nir_instr_free(instr);
|
||||
}
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
nir_shader_create(void *mem_ctx,
|
||||
gl_shader_stage stage,
|
||||
|
@ -107,6 +118,7 @@ nir_shader_create(void *mem_ctx,
|
|||
shader_info *si)
|
||||
{
|
||||
nir_shader *shader = rzalloc(mem_ctx, nir_shader);
|
||||
ralloc_set_destructor(shader, nir_shader_destructor);
|
||||
|
||||
exec_list_make_empty(&shader->variables);
|
||||
|
||||
|
@ -339,7 +351,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
|
|||
dest->reg.base_offset = src->reg.base_offset;
|
||||
dest->reg.reg = src->reg.reg;
|
||||
if (src->reg.indirect) {
|
||||
dest->reg.indirect = ralloc(mem_ctx, nir_src);
|
||||
dest->reg.indirect = malloc(sizeof(nir_src));
|
||||
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
|
||||
} else {
|
||||
dest->reg.indirect = NULL;
|
||||
|
@ -357,7 +369,7 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
|
|||
dest->reg.base_offset = src->reg.base_offset;
|
||||
dest->reg.reg = src->reg.reg;
|
||||
if (src->reg.indirect) {
|
||||
dest->reg.indirect = ralloc(instr, nir_src);
|
||||
dest->reg.indirect = malloc(sizeof(nir_src));
|
||||
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
|
||||
} else {
|
||||
dest->reg.indirect = NULL;
|
||||
|
@ -567,10 +579,8 @@ nir_alu_instr *
|
|||
nir_alu_instr_create(nir_shader *shader, nir_op op)
|
||||
{
|
||||
unsigned num_srcs = nir_op_infos[op].num_inputs;
|
||||
/* TODO: don't use rzalloc */
|
||||
nir_alu_instr *instr =
|
||||
rzalloc_size(shader,
|
||||
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
|
||||
/* TODO: don't use calloc */
|
||||
nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
|
||||
|
||||
instr_init(&instr->instr, nir_instr_type_alu);
|
||||
instr->op = op;
|
||||
|
@ -586,8 +596,7 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
|
|||
nir_deref_instr *
|
||||
nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
|
||||
{
|
||||
nir_deref_instr *instr =
|
||||
rzalloc_size(shader, sizeof(nir_deref_instr));
|
||||
nir_deref_instr *instr = calloc(1, sizeof(*instr));
|
||||
|
||||
instr_init(&instr->instr, nir_instr_type_deref);
|
||||
|
||||
|
@ -609,7 +618,7 @@ nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
|
|||
nir_jump_instr *
|
||||
nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
|
||||
{
|
||||
nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
|
||||
nir_jump_instr *instr = malloc(sizeof(*instr));
|
||||
instr_init(&instr->instr, nir_instr_type_jump);
|
||||
src_init(&instr->condition);
|
||||
instr->type = type;
|
||||
|
@ -626,7 +635,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
|
|||
unsigned bit_size)
|
||||
{
|
||||
nir_load_const_instr *instr =
|
||||
rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value));
|
||||
calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
|
||||
instr_init(&instr->instr, nir_instr_type_load_const);
|
||||
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
|
||||
|
@ -640,10 +649,9 @@ nir_intrinsic_instr *
|
|||
nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
|
||||
{
|
||||
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
|
||||
/* TODO: don't use rzalloc */
|
||||
/* TODO: don't use calloc */
|
||||
nir_intrinsic_instr *instr =
|
||||
rzalloc_size(shader,
|
||||
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
|
||||
calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
|
||||
|
||||
instr_init(&instr->instr, nir_instr_type_intrinsic);
|
||||
instr->intrinsic = op;
|
||||
|
@ -664,8 +672,7 @@ nir_call_instr_create(nir_shader *shader, nir_function *callee)
|
|||
{
|
||||
const unsigned num_params = callee->num_params;
|
||||
nir_call_instr *instr =
|
||||
rzalloc_size(shader, sizeof(*instr) +
|
||||
num_params * sizeof(instr->params[0]));
|
||||
calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
|
||||
|
||||
instr_init(&instr->instr, nir_instr_type_call);
|
||||
instr->callee = callee;
|
||||
|
@ -689,13 +696,13 @@ static int8_t default_tg4_offsets[4][2] =
|
|||
nir_tex_instr *
|
||||
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
|
||||
{
|
||||
nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
|
||||
nir_tex_instr *instr = calloc(1, sizeof(*instr));
|
||||
instr_init(&instr->instr, nir_instr_type_tex);
|
||||
|
||||
dest_init(&instr->dest);
|
||||
|
||||
instr->num_srcs = num_srcs;
|
||||
instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
|
||||
instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
|
||||
for (unsigned i = 0; i < num_srcs; i++)
|
||||
src_init(&instr->src[i].src);
|
||||
|
||||
|
@ -713,7 +720,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
|
|||
nir_tex_src_type src_type,
|
||||
nir_src src)
|
||||
{
|
||||
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
|
||||
nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
|
||||
tex->num_srcs + 1);
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
|
@ -722,7 +729,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
|
|||
&tex->src[i].src);
|
||||
}
|
||||
|
||||
ralloc_free(tex->src);
|
||||
free(tex->src);
|
||||
tex->src = new_srcs;
|
||||
|
||||
tex->src[tex->num_srcs].src_type = src_type;
|
||||
|
@ -758,7 +765,7 @@ nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
|
|||
nir_phi_instr *
|
||||
nir_phi_instr_create(nir_shader *shader)
|
||||
{
|
||||
nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
|
||||
nir_phi_instr *instr = malloc(sizeof(*instr));
|
||||
instr_init(&instr->instr, nir_instr_type_phi);
|
||||
|
||||
dest_init(&instr->dest);
|
||||
|
@ -782,7 +789,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
|
|||
{
|
||||
nir_phi_src *phi_src;
|
||||
|
||||
phi_src = rzalloc(instr, nir_phi_src);
|
||||
phi_src = calloc(1, sizeof(nir_phi_src));
|
||||
phi_src->pred = pred;
|
||||
phi_src->src = src;
|
||||
phi_src->src.parent_instr = &instr->instr;
|
||||
|
@ -794,7 +801,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
|
|||
nir_parallel_copy_instr *
|
||||
nir_parallel_copy_instr_create(nir_shader *shader)
|
||||
{
|
||||
nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
|
||||
nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
|
||||
instr_init(&instr->instr, nir_instr_type_parallel_copy);
|
||||
|
||||
exec_list_make_empty(&instr->entries);
|
||||
|
@ -809,7 +816,7 @@ nir_ssa_undef_instr_create(nir_shader *shader,
|
|||
unsigned num_components,
|
||||
unsigned bit_size)
|
||||
{
|
||||
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
|
||||
nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
|
||||
instr_init(&instr->instr, nir_instr_type_ssa_undef);
|
||||
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
|
||||
|
@ -1115,10 +1122,50 @@ void nir_instr_remove_v(nir_instr *instr)
|
|||
}
|
||||
}
|
||||
|
||||
static bool nir_instr_free_src_indirects(nir_src *src, void *state)
|
||||
{
|
||||
if (!src->is_ssa && src->reg.indirect) {
|
||||
assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
|
||||
free(src->reg.indirect);
|
||||
src->reg.indirect = NULL;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool nir_instr_free_dest_indirects(nir_dest *dest, void *state)
|
||||
{
|
||||
if (!dest->is_ssa && dest->reg.indirect) {
|
||||
assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
|
||||
free(dest->reg.indirect);
|
||||
dest->reg.indirect = NULL;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void nir_instr_free(nir_instr *instr)
|
||||
{
|
||||
nir_foreach_src(instr, nir_instr_free_src_indirects, NULL);
|
||||
nir_foreach_dest(instr, nir_instr_free_dest_indirects, NULL);
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_tex:
|
||||
free(nir_instr_as_tex(instr)->src);
|
||||
break;
|
||||
|
||||
case nir_instr_type_phi: {
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
nir_foreach_phi_src_safe(phi_src, phi) {
|
||||
free(phi_src);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
list_del(&instr->gc_node);
|
||||
ralloc_free(instr);
|
||||
free(instr);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -243,7 +243,7 @@ __clone_src(clone_state *state, void *ninstr_or_if,
|
|||
} else {
|
||||
nsrc->reg.reg = remap_reg(state, src->reg.reg);
|
||||
if (src->reg.indirect) {
|
||||
nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
|
||||
nsrc->reg.indirect = malloc(sizeof(nir_src));
|
||||
__clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
|
||||
}
|
||||
nsrc->reg.base_offset = src->reg.base_offset;
|
||||
|
@ -263,7 +263,7 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
|
|||
} else {
|
||||
ndst->reg.reg = remap_reg(state, dst->reg.reg);
|
||||
if (dst->reg.indirect) {
|
||||
ndst->reg.indirect = ralloc(ninstr, nir_src);
|
||||
ndst->reg.indirect = malloc(sizeof(nir_src));
|
||||
__clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
|
||||
}
|
||||
ndst->reg.base_offset = dst->reg.base_offset;
|
||||
|
@ -790,6 +790,10 @@ nir_shader_replace(nir_shader *dst, nir_shader *src)
|
|||
ralloc_adopt(dead_ctx, dst);
|
||||
ralloc_free(dead_ctx);
|
||||
|
||||
list_for_each_entry_safe(nir_instr, instr, &dst->gc_list, gc_node) {
|
||||
nir_instr_free(instr);
|
||||
}
|
||||
|
||||
/* Re-parent all of src's ralloc children to dst */
|
||||
ralloc_adopt(dst, src);
|
||||
|
||||
|
|
|
@ -440,6 +440,7 @@ remove_phi_src(nir_block *block, nir_block *pred)
|
|||
if (src->pred == pred) {
|
||||
list_del(&src->src.use_link);
|
||||
exec_node_remove(&src->node);
|
||||
free(src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
|
|||
nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
ralloc_free(&intrin->instr);
|
||||
nir_instr_free(&intrin->instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
@ -159,7 +159,7 @@ get_deref_reg_src(nir_deref_instr *deref, struct locals_to_regs_state *state)
|
|||
if (src.reg.indirect) {
|
||||
assert(src.reg.base_offset == 0);
|
||||
} else {
|
||||
src.reg.indirect = ralloc(b->shader, nir_src);
|
||||
src.reg.indirect = malloc(sizeof(nir_src));
|
||||
*src.reg.indirect =
|
||||
nir_src_for_ssa(nir_imm_int(b, src.reg.base_offset));
|
||||
src.reg.base_offset = 0;
|
||||
|
|
|
@ -550,7 +550,7 @@ read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
|
|||
src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
|
||||
src->reg.base_offset = blob_read_uint32(ctx->blob);
|
||||
if (header.any.is_indirect) {
|
||||
src->reg.indirect = ralloc(mem_ctx, nir_src);
|
||||
src->reg.indirect = malloc(sizeof(nir_src));
|
||||
read_src(ctx, src->reg.indirect, mem_ctx);
|
||||
} else {
|
||||
src->reg.indirect = NULL;
|
||||
|
@ -770,7 +770,7 @@ read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
|
|||
dst->reg.reg = read_object(ctx);
|
||||
dst->reg.base_offset = blob_read_uint32(ctx->blob);
|
||||
if (dest.reg.is_indirect) {
|
||||
dst->reg.indirect = ralloc(instr, nir_src);
|
||||
dst->reg.indirect = malloc(sizeof(nir_src));
|
||||
read_src(ctx, dst->reg.indirect, instr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,8 +76,6 @@ sweep_block(nir_shader *nir, nir_block *block)
|
|||
list_del(&instr->gc_node);
|
||||
list_add(&instr->gc_node, &nir->gc_list);
|
||||
|
||||
ralloc_steal(nir, instr);
|
||||
|
||||
nir_foreach_src(instr, sweep_src_indirect, nir);
|
||||
nir_foreach_dest(instr, sweep_dest_indirect, nir);
|
||||
}
|
||||
|
@ -179,11 +177,7 @@ nir_sweep(nir_shader *nir)
|
|||
sweep_function(nir, func);
|
||||
}
|
||||
|
||||
/* Manually GCed instrs now before ralloc_free()ing the other rubbish, to
|
||||
* ensure that the shader's GC list was maintained without ralloc_freeing any
|
||||
* instrs behind our back. Note that the instr free routine will remove it
|
||||
* from the list.
|
||||
*/
|
||||
/* Sweep instrs not found while walking the shader. */
|
||||
list_for_each_entry_safe(nir_instr, instr, &instr_gc_list, gc_node) {
|
||||
nir_instr_free(instr);
|
||||
}
|
||||
|
|
|
@ -823,7 +823,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
|
|||
dest.saturate = false;
|
||||
|
||||
if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
|
||||
nir_src *indirect = ralloc(c->build.shader, nir_src);
|
||||
nir_src *indirect = malloc(sizeof(nir_src));
|
||||
*indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
|
||||
dest.dest.reg.indirect = indirect;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue