nir: Switch from ralloc to malloc for NIR instructions.

By replacing the 48-byte ralloc header with our exec_node gc_node (16
bytes), runtime of shader-db on my system across this series drops
-4.21738% +/- 1.47757% (n=5).

Inspired by discussion on #5034.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11776>
This commit is contained in:
Emma Anholt 2021-07-07 10:30:05 -07:00 committed by Marge Bot
parent feee5e6974
commit 879a569884
8 changed files with 84 additions and 38 deletions

View File

@ -100,6 +100,17 @@ nir_component_mask_reinterpret(nir_component_mask_t mask,
return new_mask;
}
static void
nir_shader_destructor(void *ptr)
{
nir_shader *shader = ptr;
/* Free all instrs from the shader, since they're not ralloced. */
list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
nir_instr_free(instr);
}
}
nir_shader *
nir_shader_create(void *mem_ctx,
gl_shader_stage stage,
@ -107,6 +118,7 @@ nir_shader_create(void *mem_ctx,
shader_info *si)
{
nir_shader *shader = rzalloc(mem_ctx, nir_shader);
ralloc_set_destructor(shader, nir_shader_destructor);
exec_list_make_empty(&shader->variables);
@ -339,7 +351,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
dest->reg.indirect = ralloc(mem_ctx, nir_src);
dest->reg.indirect = malloc(sizeof(nir_src));
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
} else {
dest->reg.indirect = NULL;
@ -357,7 +369,7 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
dest->reg.indirect = ralloc(instr, nir_src);
dest->reg.indirect = malloc(sizeof(nir_src));
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
} else {
dest->reg.indirect = NULL;
@ -567,10 +579,8 @@ nir_alu_instr *
nir_alu_instr_create(nir_shader *shader, nir_op op)
{
unsigned num_srcs = nir_op_infos[op].num_inputs;
/* TODO: don't use rzalloc */
nir_alu_instr *instr =
rzalloc_size(shader,
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
/* TODO: don't use calloc */
nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
instr_init(&instr->instr, nir_instr_type_alu);
instr->op = op;
@ -586,8 +596,7 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
nir_deref_instr *
nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
{
nir_deref_instr *instr =
rzalloc_size(shader, sizeof(nir_deref_instr));
nir_deref_instr *instr = calloc(1, sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_deref);
@ -609,7 +618,7 @@ nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
nir_jump_instr *
nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{
nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
nir_jump_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_jump);
src_init(&instr->condition);
instr->type = type;
@ -626,7 +635,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
unsigned bit_size)
{
nir_load_const_instr *instr =
rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value));
calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
@ -640,10 +649,9 @@ nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
{
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
/* TODO: don't use rzalloc */
/* TODO: don't use calloc */
nir_intrinsic_instr *instr =
rzalloc_size(shader,
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
instr_init(&instr->instr, nir_instr_type_intrinsic);
instr->intrinsic = op;
@ -664,8 +672,7 @@ nir_call_instr_create(nir_shader *shader, nir_function *callee)
{
const unsigned num_params = callee->num_params;
nir_call_instr *instr =
rzalloc_size(shader, sizeof(*instr) +
num_params * sizeof(instr->params[0]));
calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee;
@ -689,13 +696,13 @@ static int8_t default_tg4_offsets[4][2] =
nir_tex_instr *
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{
nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
nir_tex_instr *instr = calloc(1, sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest);
instr->num_srcs = num_srcs;
instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src);
@ -713,7 +720,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
nir_tex_src_type src_type,
nir_src src)
{
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
tex->num_srcs + 1);
for (unsigned i = 0; i < tex->num_srcs; i++) {
@ -722,7 +729,7 @@ nir_tex_instr_add_src(nir_tex_instr *tex,
&tex->src[i].src);
}
ralloc_free(tex->src);
free(tex->src);
tex->src = new_srcs;
tex->src[tex->num_srcs].src_type = src_type;
@ -758,7 +765,7 @@ nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
nir_phi_instr *
nir_phi_instr_create(nir_shader *shader)
{
nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
nir_phi_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest);
@ -782,7 +789,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
{
nir_phi_src *phi_src;
phi_src = rzalloc(instr, nir_phi_src);
phi_src = calloc(1, sizeof(nir_phi_src));
phi_src->pred = pred;
phi_src->src = src;
phi_src->src.parent_instr = &instr->instr;
@ -794,7 +801,7 @@ nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader *shader)
{
nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries);
@ -809,7 +816,7 @@ nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size)
{
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
@ -1115,10 +1122,50 @@ void nir_instr_remove_v(nir_instr *instr)
}
}
static bool nir_instr_free_src_indirects(nir_src *src, void *state)
{
if (!src->is_ssa && src->reg.indirect) {
assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
free(src->reg.indirect);
src->reg.indirect = NULL;
}
return true;
}
static bool nir_instr_free_dest_indirects(nir_dest *dest, void *state)
{
if (!dest->is_ssa && dest->reg.indirect) {
assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
free(dest->reg.indirect);
dest->reg.indirect = NULL;
}
return true;
}
void nir_instr_free(nir_instr *instr)
{
nir_foreach_src(instr, nir_instr_free_src_indirects, NULL);
nir_foreach_dest(instr, nir_instr_free_dest_indirects, NULL);
switch (instr->type) {
case nir_instr_type_tex:
free(nir_instr_as_tex(instr)->src);
break;
case nir_instr_type_phi: {
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_foreach_phi_src_safe(phi_src, phi) {
free(phi_src);
}
break;
}
default:
break;
}
list_del(&instr->gc_node);
ralloc_free(instr);
free(instr);
}
void

View File

@ -243,7 +243,7 @@ __clone_src(clone_state *state, void *ninstr_or_if,
} else {
nsrc->reg.reg = remap_reg(state, src->reg.reg);
if (src->reg.indirect) {
nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
nsrc->reg.indirect = malloc(sizeof(nir_src));
__clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
}
nsrc->reg.base_offset = src->reg.base_offset;
@ -263,7 +263,7 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
} else {
ndst->reg.reg = remap_reg(state, dst->reg.reg);
if (dst->reg.indirect) {
ndst->reg.indirect = ralloc(ninstr, nir_src);
ndst->reg.indirect = malloc(sizeof(nir_src));
__clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
}
ndst->reg.base_offset = dst->reg.base_offset;
@ -790,6 +790,10 @@ nir_shader_replace(nir_shader *dst, nir_shader *src)
ralloc_adopt(dead_ctx, dst);
ralloc_free(dead_ctx);
list_for_each_entry_safe(nir_instr, instr, &dst->gc_list, gc_node) {
nir_instr_free(instr);
}
/* Re-parent all of src's ralloc children to dst */
ralloc_adopt(dst, src);

View File

@ -440,6 +440,7 @@ remove_phi_src(nir_block *block, nir_block *pred)
if (src->pred == pred) {
list_del(&src->src.use_link);
exec_node_remove(&src->node);
free(src);
}
}
}

View File

@ -58,7 +58,7 @@ lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
nir_instr_remove(&intrin->instr);
ralloc_free(&intrin->instr);
nir_instr_free(&intrin->instr);
}
static bool

View File

@ -159,7 +159,7 @@ get_deref_reg_src(nir_deref_instr *deref, struct locals_to_regs_state *state)
if (src.reg.indirect) {
assert(src.reg.base_offset == 0);
} else {
src.reg.indirect = ralloc(b->shader, nir_src);
src.reg.indirect = malloc(sizeof(nir_src));
*src.reg.indirect =
nir_src_for_ssa(nir_imm_int(b, src.reg.base_offset));
src.reg.base_offset = 0;

View File

@ -550,7 +550,7 @@ read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
src->reg.base_offset = blob_read_uint32(ctx->blob);
if (header.any.is_indirect) {
src->reg.indirect = ralloc(mem_ctx, nir_src);
src->reg.indirect = malloc(sizeof(nir_src));
read_src(ctx, src->reg.indirect, mem_ctx);
} else {
src->reg.indirect = NULL;
@ -770,7 +770,7 @@ read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
dst->reg.reg = read_object(ctx);
dst->reg.base_offset = blob_read_uint32(ctx->blob);
if (dest.reg.is_indirect) {
dst->reg.indirect = ralloc(instr, nir_src);
dst->reg.indirect = malloc(sizeof(nir_src));
read_src(ctx, dst->reg.indirect, instr);
}
}

View File

@ -76,8 +76,6 @@ sweep_block(nir_shader *nir, nir_block *block)
list_del(&instr->gc_node);
list_add(&instr->gc_node, &nir->gc_list);
ralloc_steal(nir, instr);
nir_foreach_src(instr, sweep_src_indirect, nir);
nir_foreach_dest(instr, sweep_dest_indirect, nir);
}
@ -179,11 +177,7 @@ nir_sweep(nir_shader *nir)
sweep_function(nir, func);
}
/* Manually GCed instrs now before ralloc_free()ing the other rubbish, to
* ensure that the shader's GC list was maintained without ralloc_freeing any
* instrs behind our back. Note that the instr free routine will remove it
* from the list.
*/
/* Sweep instrs not found while walking the shader. */
list_for_each_entry_safe(nir_instr, instr, &instr_gc_list, gc_node) {
nir_instr_free(instr);
}

View File

@ -823,7 +823,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
dest.saturate = false;
if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
nir_src *indirect = ralloc(c->build.shader, nir_src);
nir_src *indirect = malloc(sizeof(nir_src));
*indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
dest.dest.reg.indirect = indirect;
}