nir: Switch from ralloc to malloc for NIR instructions.

By replacing the 48-byte ralloc header with our exec_node gc_node (16
bytes), runtime of shader-db on my system across this series drops
-4.21738% +/- 1.47757% (n=5).

Inspired by discussion on #5034.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11776>
This commit is contained in:
Emma Anholt 2021-07-07 10:30:05 -07:00 committed by Marge Bot
parent feee5e6974
commit 879a569884
8 changed files with 84 additions and 38 deletions

View File

@ -100,6 +100,17 @@ nir_component_mask_reinterpret(nir_component_mask_t mask,
return new_mask; return new_mask;
} }
static void
nir_shader_destructor(void *ptr)
{
nir_shader *shader = ptr;
/* Free all instrs from the shader, since they're not ralloced. */
list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
nir_instr_free(instr);
}
}
nir_shader * nir_shader *
nir_shader_create(void *mem_ctx, nir_shader_create(void *mem_ctx,
gl_shader_stage stage, gl_shader_stage stage,
@ -107,6 +118,7 @@ nir_shader_create(void *mem_ctx,
shader_info *si) shader_info *si)
{ {
nir_shader *shader = rzalloc(mem_ctx, nir_shader); nir_shader *shader = rzalloc(mem_ctx, nir_shader);
ralloc_set_destructor(shader, nir_shader_destructor);
exec_list_make_empty(&shader->variables); exec_list_make_empty(&shader->variables);
@ -339,7 +351,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
dest->reg.base_offset = src->reg.base_offset; dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg; dest->reg.reg = src->reg.reg;
if (src->reg.indirect) { if (src->reg.indirect) {
dest->reg.indirect = ralloc(mem_ctx, nir_src); dest->reg.indirect = malloc(sizeof(nir_src));
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
} else { } else {
dest->reg.indirect = NULL; dest->reg.indirect = NULL;
@ -357,7 +369,7 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
dest->reg.base_offset = src->reg.base_offset; dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg; dest->reg.reg = src->reg.reg;
if (src->reg.indirect) { if (src->reg.indirect) {
dest->reg.indirect = ralloc(instr, nir_src); dest->reg.indirect = malloc(sizeof(nir_src));
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
} else { } else {
dest->reg.indirect = NULL; dest->reg.indirect = NULL;
@ -567,10 +579,8 @@ nir_alu_instr *
nir_alu_instr_create(nir_shader *shader, nir_op op) nir_alu_instr_create(nir_shader *shader, nir_op op)
{ {
unsigned num_srcs = nir_op_infos[op].num_inputs; unsigned num_srcs = nir_op_infos[op].num_inputs;
/* TODO: don't use rzalloc */ /* TODO: don't use calloc */
nir_alu_instr *instr = nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
rzalloc_size(shader,
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
instr_init(&instr->instr, nir_instr_type_alu); instr_init(&instr->instr, nir_instr_type_alu);
instr->op = op; instr->op = op;
@ -586,8 +596,7 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
nir_deref_instr * nir_deref_instr *
nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
{ {
nir_deref_instr *instr = nir_deref_instr *instr = calloc(1, sizeof(*instr));
rzalloc_size(shader, sizeof(nir_deref_instr));
instr_init(&instr->instr, nir_instr_type_deref); instr_init(&instr->instr, nir_instr_type_deref);
@ -609,7 +618,7 @@ nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
nir_jump_instr * nir_jump_instr *
nir_jump_instr_create(nir_shader *shader, nir_jump_type type) nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{ {
nir_jump_instr *instr = ralloc(shader, nir_jump_instr); nir_jump_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_jump); instr_init(&instr->instr, nir_instr_type_jump);
src_init(&instr->condition); src_init(&instr->condition);
instr->type = type; instr->type = type;
@ -626,7 +635,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
unsigned bit_size) unsigned bit_size)
{ {
nir_load_const_instr *instr = nir_load_const_instr *instr =
rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value)); calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
instr_init(&instr->instr, nir_instr_type_load_const); instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
@ -640,10 +649,9 @@ nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
{ {
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
/* TODO: don't use rzalloc */ /* TODO: don't use calloc */
nir_intrinsic_instr *instr = nir_intrinsic_instr *instr =
rzalloc_size(shader, calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
instr_init(&instr->instr, nir_instr_type_intrinsic); instr_init(&instr->instr, nir_instr_type_intrinsic);
instr->intrinsic = op; instr->intrinsic = op;
@ -664,8 +672,7 @@ nir_call_instr_create(nir_shader *shader, nir_function *callee)
{ {
const unsigned num_params = callee->num_params; const unsigned num_params = callee->num_params;
nir_call_instr *instr = nir_call_instr *instr =
rzalloc_size(shader, sizeof(*instr) + calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
num_params * sizeof(instr->params[0]));
instr_init(&instr->instr, nir_instr_type_call); instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee; instr->callee = callee;
@ -689,13 +696,13 @@ static int8_t default_tg4_offsets[4][2] =
nir_tex_instr * nir_tex_instr *
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{ {
nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); nir_tex_instr *instr = calloc(1, sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_tex); instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest); dest_init(&instr->dest);
instr->num_srcs = num_srcs; instr->num_srcs = num_srcs;
instr->src = ralloc_array(instr, nir_tex_src, num_srcs); instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
for (unsigned i = 0; i < num_srcs; i++) for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src); src_init(&instr->src[i].src);
@ -713,7 +720,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
nir_tex_src_type src_type, nir_tex_src_type src_type,
nir_src src) nir_src src)
{ {
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
tex->num_srcs + 1); tex->num_srcs + 1);
for (unsigned i = 0; i < tex->num_srcs; i++) { for (unsigned i = 0; i < tex->num_srcs; i++) {
@ -722,7 +729,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
&tex->src[i].src); &tex->src[i].src);
} }
ralloc_free(tex->src); free(tex->src);
tex->src = new_srcs; tex->src = new_srcs;
tex->src[tex->num_srcs].src_type = src_type; tex->src[tex->num_srcs].src_type = src_type;
@ -758,7 +765,7 @@ nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
nir_phi_instr * nir_phi_instr *
nir_phi_instr_create(nir_shader *shader) nir_phi_instr_create(nir_shader *shader)
{ {
nir_phi_instr *instr = ralloc(shader, nir_phi_instr); nir_phi_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_phi); instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest); dest_init(&instr->dest);
@ -782,7 +789,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
{ {
nir_phi_src *phi_src; nir_phi_src *phi_src;
phi_src = rzalloc(instr, nir_phi_src); phi_src = calloc(1, sizeof(nir_phi_src));
phi_src->pred = pred; phi_src->pred = pred;
phi_src->src = src; phi_src->src = src;
phi_src->src.parent_instr = &instr->instr; phi_src->src.parent_instr = &instr->instr;
@ -794,7 +801,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
nir_parallel_copy_instr * nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader *shader) nir_parallel_copy_instr_create(nir_shader *shader)
{ {
nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_parallel_copy); instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries); exec_list_make_empty(&instr->entries);
@ -809,7 +816,7 @@ nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components, unsigned num_components,
unsigned bit_size) unsigned bit_size)
{ {
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_ssa_undef); instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
@ -1115,10 +1122,50 @@ void nir_instr_remove_v(nir_instr *instr)
} }
} }
static bool nir_instr_free_src_indirects(nir_src *src, void *state)
{
if (!src->is_ssa && src->reg.indirect) {
assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
free(src->reg.indirect);
src->reg.indirect = NULL;
}
return true;
}
static bool nir_instr_free_dest_indirects(nir_dest *dest, void *state)
{
if (!dest->is_ssa && dest->reg.indirect) {
assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
free(dest->reg.indirect);
dest->reg.indirect = NULL;
}
return true;
}
void nir_instr_free(nir_instr *instr) void nir_instr_free(nir_instr *instr)
{ {
nir_foreach_src(instr, nir_instr_free_src_indirects, NULL);
nir_foreach_dest(instr, nir_instr_free_dest_indirects, NULL);
switch (instr->type) {
case nir_instr_type_tex:
free(nir_instr_as_tex(instr)->src);
break;
case nir_instr_type_phi: {
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_foreach_phi_src_safe(phi_src, phi) {
free(phi_src);
}
break;
}
default:
break;
}
list_del(&instr->gc_node); list_del(&instr->gc_node);
ralloc_free(instr); free(instr);
} }
void void

View File

@ -243,7 +243,7 @@ __clone_src(clone_state *state, void *ninstr_or_if,
} else { } else {
nsrc->reg.reg = remap_reg(state, src->reg.reg); nsrc->reg.reg = remap_reg(state, src->reg.reg);
if (src->reg.indirect) { if (src->reg.indirect) {
nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); nsrc->reg.indirect = malloc(sizeof(nir_src));
__clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
} }
nsrc->reg.base_offset = src->reg.base_offset; nsrc->reg.base_offset = src->reg.base_offset;
@ -263,7 +263,7 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
} else { } else {
ndst->reg.reg = remap_reg(state, dst->reg.reg); ndst->reg.reg = remap_reg(state, dst->reg.reg);
if (dst->reg.indirect) { if (dst->reg.indirect) {
ndst->reg.indirect = ralloc(ninstr, nir_src); ndst->reg.indirect = malloc(sizeof(nir_src));
__clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
} }
ndst->reg.base_offset = dst->reg.base_offset; ndst->reg.base_offset = dst->reg.base_offset;
@ -790,6 +790,10 @@ nir_shader_replace(nir_shader *dst, nir_shader *src)
ralloc_adopt(dead_ctx, dst); ralloc_adopt(dead_ctx, dst);
ralloc_free(dead_ctx); ralloc_free(dead_ctx);
list_for_each_entry_safe(nir_instr, instr, &dst->gc_list, gc_node) {
nir_instr_free(instr);
}
/* Re-parent all of src's ralloc children to dst */ /* Re-parent all of src's ralloc children to dst */
ralloc_adopt(dst, src); ralloc_adopt(dst, src);

View File

@ -440,6 +440,7 @@ remove_phi_src(nir_block *block, nir_block *pred)
if (src->pred == pred) { if (src->pred == pred) {
list_del(&src->src.use_link); list_del(&src->src.use_link);
exec_node_remove(&src->node); exec_node_remove(&src->node);
free(src);
} }
} }
} }

View File

@ -58,7 +58,7 @@ lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components); nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
nir_instr_remove(&intrin->instr); nir_instr_remove(&intrin->instr);
ralloc_free(&intrin->instr); nir_instr_free(&intrin->instr);
} }
static bool static bool

View File

@ -159,7 +159,7 @@ get_deref_reg_src(nir_deref_instr *deref, struct locals_to_regs_state *state)
if (src.reg.indirect) { if (src.reg.indirect) {
assert(src.reg.base_offset == 0); assert(src.reg.base_offset == 0);
} else { } else {
src.reg.indirect = ralloc(b->shader, nir_src); src.reg.indirect = malloc(sizeof(nir_src));
*src.reg.indirect = *src.reg.indirect =
nir_src_for_ssa(nir_imm_int(b, src.reg.base_offset)); nir_src_for_ssa(nir_imm_int(b, src.reg.base_offset));
src.reg.base_offset = 0; src.reg.base_offset = 0;

View File

@ -550,7 +550,7 @@ read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
src->reg.reg = read_lookup_object(ctx, header.any.object_idx); src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
src->reg.base_offset = blob_read_uint32(ctx->blob); src->reg.base_offset = blob_read_uint32(ctx->blob);
if (header.any.is_indirect) { if (header.any.is_indirect) {
src->reg.indirect = ralloc(mem_ctx, nir_src); src->reg.indirect = malloc(sizeof(nir_src));
read_src(ctx, src->reg.indirect, mem_ctx); read_src(ctx, src->reg.indirect, mem_ctx);
} else { } else {
src->reg.indirect = NULL; src->reg.indirect = NULL;
@ -770,7 +770,7 @@ read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
dst->reg.reg = read_object(ctx); dst->reg.reg = read_object(ctx);
dst->reg.base_offset = blob_read_uint32(ctx->blob); dst->reg.base_offset = blob_read_uint32(ctx->blob);
if (dest.reg.is_indirect) { if (dest.reg.is_indirect) {
dst->reg.indirect = ralloc(instr, nir_src); dst->reg.indirect = malloc(sizeof(nir_src));
read_src(ctx, dst->reg.indirect, instr); read_src(ctx, dst->reg.indirect, instr);
} }
} }

View File

@ -76,8 +76,6 @@ sweep_block(nir_shader *nir, nir_block *block)
list_del(&instr->gc_node); list_del(&instr->gc_node);
list_add(&instr->gc_node, &nir->gc_list); list_add(&instr->gc_node, &nir->gc_list);
ralloc_steal(nir, instr);
nir_foreach_src(instr, sweep_src_indirect, nir); nir_foreach_src(instr, sweep_src_indirect, nir);
nir_foreach_dest(instr, sweep_dest_indirect, nir); nir_foreach_dest(instr, sweep_dest_indirect, nir);
} }
@ -179,11 +177,7 @@ nir_sweep(nir_shader *nir)
sweep_function(nir, func); sweep_function(nir, func);
} }
/* Manually GCed instrs now before ralloc_free()ing the other rubbish, to /* Sweep instrs not found while walking the shader. */
* ensure that the shader's GC list was maintained without ralloc_freeing any
* instrs behind our back. Note that the instr free routine will remove it
* from the list.
*/
list_for_each_entry_safe(nir_instr, instr, &instr_gc_list, gc_node) { list_for_each_entry_safe(nir_instr, instr, &instr_gc_list, gc_node) {
nir_instr_free(instr); nir_instr_free(instr);
} }

View File

@ -823,7 +823,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
dest.saturate = false; dest.saturate = false;
if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) { if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
nir_src *indirect = ralloc(c->build.shader, nir_src); nir_src *indirect = malloc(sizeof(nir_src));
*indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect)); *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
dest.dest.reg.indirect = indirect; dest.dest.reg.indirect = indirect;
} }