radeonsi: update the VS shader key in set & bind functions and remove memsets

This decreases overhead of si_update_shaders and overall driver overhead.

The VS shader key portion related to VS inputs is updated in set & bind
functions. Other fields related to outputs are still updated
in si_shader_selector_key.

Now that all modified fields are set to 0 when not needed, and remove
the memsets.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
Marek Olšák 2021-08-10 06:16:07 -04:00 committed by Marge Bot
parent 74a0c9bd51
commit aed93eb991
3 changed files with 47 additions and 30 deletions

View File

@ -4890,8 +4890,10 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
* src_offset alignment, which is reflected in fix_fetch_opencode. */
old->fix_fetch_opencode != v->fix_fetch_opencode ||
memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) *
MAX2(old->count, v->count)))
MAX2(old->count, v->count))) {
si_vs_key_update_inputs(sctx);
sctx->do_update_shaders = true;
}
if (v->instance_divisor_is_fetched) {
struct pipe_constant_buffer cb;
@ -4987,8 +4989,10 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot,
* be the case in well-behaved applications anyway.
*/
if ((sctx->vertex_elements->vb_alignment_check_mask &
(unaligned | orig_unaligned) & updated_mask))
(unaligned | orig_unaligned) & updated_mask)) {
si_vs_key_update_inputs(sctx);
sctx->do_update_shaders = true;
}
}
/*

View File

@ -578,8 +578,9 @@ int si_shader_select_with_key(struct si_screen *sscreen, struct si_shader_ctx_st
struct si_compiler_ctx_state *compiler_state,
const struct si_shader_key *key, int thread_index,
bool optimized_or_none);
void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selector *vs,
struct si_shader_key *key, struct si_vs_prolog_bits *prolog_key);
void si_vs_key_update_inputs(struct si_context *sctx);
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
struct si_vs_prolog_bits *prolog_key);
unsigned si_get_input_prim(const struct si_shader_selector *gs);
bool si_update_ngg(struct si_context *sctx);
void si_ps_key_update_framebuffer(struct si_context *sctx);

View File

@ -1785,16 +1785,31 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
}
}
void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selector *vs,
struct si_shader_key *key, struct si_vs_prolog_bits *prolog_key)
static void si_clear_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
struct si_vs_prolog_bits *prolog_key)
{
if (vs->info.base.vs.blit_sgprs_amd)
prolog_key->instance_divisor_is_one = 0;
prolog_key->instance_divisor_is_fetched = 0;
key->mono.vs_fetch_opencode = 0;
memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch));
}
void si_vs_key_update_inputs(struct si_context *sctx)
{
struct si_shader_selector *vs = sctx->shader.vs.cso;
struct si_vertex_elements *elts = sctx->vertex_elements;
struct si_shader_key *key = &sctx->shader.vs.key;
if (!vs)
return;
struct si_vertex_elements *elts = sctx->vertex_elements;
if (vs->info.base.vs.blit_sgprs_amd) {
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
return;
}
prolog_key->instance_divisor_is_one = elts->instance_divisor_is_one;
prolog_key->instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
key->part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one;
key->part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
unsigned count_mask = (1 << vs->info.num_inputs) - 1;
unsigned fix = elts->fix_fetch_always & count_mask;
@ -1815,6 +1830,8 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selecto
}
}
memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch));
while (fix) {
unsigned i = u_bit_scan(&fix);
key->mono.vs_fix_fetch[i].bits = elts->fix_fetch[i];
@ -1822,6 +1839,17 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selecto
key->mono.vs_fetch_opencode = opencode;
}
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
struct si_vs_prolog_bits *prolog_key)
{
prolog_key->instance_divisor_is_one = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_one;
prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_fetched;
key->mono.vs_fetch_opencode = sctx->shader.vs.key.mono.vs_fetch_opencode;
memcpy(key->mono.vs_fix_fetch, sctx->shader.vs.key.mono.vs_fix_fetch,
sizeof(key->mono.vs_fix_fetch));
}
static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
struct si_shader_key *key)
{
@ -2118,24 +2146,14 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
switch (sel->info.stage) {
case MESA_SHADER_VERTEX:
memset(&key->part, 0, sizeof(key->part));
memset(&key->mono, 0, sizeof(key->mono));
memset(&key->opt, 0, sizeof(key->opt));
si_shader_selector_key_vs(sctx, sel, key, &key->part.vs.prolog);
if (!sctx->shader.tes.cso && !sctx->shader.gs.cso)
si_get_vs_key_outputs(sctx, sel, key);
else
si_clear_vs_key_outputs(sctx, sel, key);
break;
case MESA_SHADER_TESS_CTRL:
memset(&key->part, 0, sizeof(key->part));
memset(&key->mono, 0, sizeof(key->mono));
memset(&key->opt, 0, sizeof(key->opt));
if (sctx->chip_class >= GFX9) {
si_shader_selector_key_vs(sctx, sctx->shader.vs.cso, key, &key->part.tcs.ls_prolog);
si_get_vs_key_inputs(sctx, key, &key->part.tcs.ls_prolog);
key->part.tcs.ls = sctx->shader.vs.cso;
/* When the LS VGPR fix is needed, monolithic shaders
@ -2164,25 +2182,18 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
key->mono.u.ff_tcs_inputs_to_copy = sctx->shader.vs.cso->outputs_written;
break;
case MESA_SHADER_TESS_EVAL:
memset(&key->part, 0, sizeof(key->part));
memset(&key->mono, 0, sizeof(key->mono));
memset(&key->opt, 0, sizeof(key->opt));
if (!sctx->shader.gs.cso)
si_get_vs_key_outputs(sctx, sel, key);
else
si_clear_vs_key_outputs(sctx, sel, key);
break;
case MESA_SHADER_GEOMETRY:
memset(&key->part, 0, sizeof(key->part));
memset(&key->mono, 0, sizeof(key->mono));
memset(&key->opt, 0, sizeof(key->opt));
if (sctx->chip_class >= GFX9) {
if (sctx->shader.tes.cso) {
si_clear_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog);
key->part.gs.es = sctx->shader.tes.cso;
} else {
si_shader_selector_key_vs(sctx, sctx->shader.vs.cso, key, &key->part.gs.vs_prolog);
si_get_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog);
key->part.gs.es = sctx->shader.vs.cso;
}
@ -3194,6 +3205,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant, si_get_vs(sctx)->cso,
si_get_vs(sctx)->current);
si_update_rasterized_prim(sctx);
si_vs_key_update_inputs(sctx);
}
static void si_update_tess_uses_prim_id(struct si_context *sctx)