From aed93eb991a2b5fe61b764259ad5d524ff3ca103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 10 Aug 2021 06:16:07 -0400 Subject: [PATCH] radeonsi: update the VS shader key in set & bind functions and remove memsets This decreases overhead of si_update_shaders and overall driver overhead. The VS shader key portion related to VS inputs is updated in set & bind functions. Other fields related to outputs are still updated in si_shader_selector_key. Now that all modified fields are set to 0 when not needed, and remove the memsets. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_state.c | 8 ++- src/gallium/drivers/radeonsi/si_state.h | 5 +- .../drivers/radeonsi/si_state_shaders.c | 64 +++++++++++-------- 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index cb4bf2d8ebf..cf63ddd333a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4890,8 +4890,10 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) * src_offset alignment, which is reflected in fix_fetch_opencode. */ old->fix_fetch_opencode != v->fix_fetch_opencode || memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * - MAX2(old->count, v->count))) + MAX2(old->count, v->count))) { + si_vs_key_update_inputs(sctx); sctx->do_update_shaders = true; + } if (v->instance_divisor_is_fetched) { struct pipe_constant_buffer cb; @@ -4987,8 +4989,10 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, * be the case in well-behaved applications anyway. */ if ((sctx->vertex_elements->vb_alignment_check_mask & - (unaligned | orig_unaligned) & updated_mask)) + (unaligned | orig_unaligned) & updated_mask)) { + si_vs_key_update_inputs(sctx); sctx->do_update_shaders = true; + } } /* diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8d42a186bea..285b74dddc1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -578,8 +578,9 @@ int si_shader_select_with_key(struct si_screen *sscreen, struct si_shader_ctx_st struct si_compiler_ctx_state *compiler_state, const struct si_shader_key *key, int thread_index, bool optimized_or_none); -void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selector *vs, - struct si_shader_key *key, struct si_vs_prolog_bits *prolog_key); +void si_vs_key_update_inputs(struct si_context *sctx); +void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, + struct si_vs_prolog_bits *prolog_key); unsigned si_get_input_prim(const struct si_shader_selector *gs); bool si_update_ngg(struct si_context *sctx); void si_ps_key_update_framebuffer(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8369699746d..5d6667e6e57 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1785,16 +1785,31 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader } } -void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selector *vs, - struct si_shader_key *key, struct si_vs_prolog_bits *prolog_key) +static void si_clear_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, + struct si_vs_prolog_bits *prolog_key) { - if (vs->info.base.vs.blit_sgprs_amd) + prolog_key->instance_divisor_is_one = 0; + prolog_key->instance_divisor_is_fetched = 0; + key->mono.vs_fetch_opencode = 0; + memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch)); +} + +void si_vs_key_update_inputs(struct si_context *sctx) +{ + struct si_shader_selector *vs = sctx->shader.vs.cso; + struct si_vertex_elements *elts = sctx->vertex_elements; + struct si_shader_key *key = &sctx->shader.vs.key; + + if (!vs) return; - struct si_vertex_elements *elts = sctx->vertex_elements; + if (vs->info.base.vs.blit_sgprs_amd) { + si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog); + return; + } - prolog_key->instance_divisor_is_one = elts->instance_divisor_is_one; - prolog_key->instance_divisor_is_fetched = elts->instance_divisor_is_fetched; + key->part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one; + key->part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched; unsigned count_mask = (1 << vs->info.num_inputs) - 1; unsigned fix = elts->fix_fetch_always & count_mask; @@ -1815,6 +1830,8 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selecto } } + memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch)); + while (fix) { unsigned i = u_bit_scan(&fix); key->mono.vs_fix_fetch[i].bits = elts->fix_fetch[i]; @@ -1822,6 +1839,17 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selecto key->mono.vs_fetch_opencode = opencode; } +void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, + struct si_vs_prolog_bits *prolog_key) +{ + prolog_key->instance_divisor_is_one = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_one; + prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_fetched; + + key->mono.vs_fetch_opencode = sctx->shader.vs.key.mono.vs_fetch_opencode; + memcpy(key->mono.vs_fix_fetch, sctx->shader.vs.key.mono.vs_fix_fetch, + sizeof(key->mono.vs_fix_fetch)); +} + static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs, struct si_shader_key *key) { @@ -2118,24 +2146,14 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh switch (sel->info.stage) { case MESA_SHADER_VERTEX: - memset(&key->part, 0, sizeof(key->part)); - memset(&key->mono, 0, sizeof(key->mono)); - memset(&key->opt, 0, sizeof(key->opt)); - - si_shader_selector_key_vs(sctx, sel, key, &key->part.vs.prolog); - if (!sctx->shader.tes.cso && !sctx->shader.gs.cso) si_get_vs_key_outputs(sctx, sel, key); else si_clear_vs_key_outputs(sctx, sel, key); break; case MESA_SHADER_TESS_CTRL: - memset(&key->part, 0, sizeof(key->part)); - memset(&key->mono, 0, sizeof(key->mono)); - memset(&key->opt, 0, sizeof(key->opt)); - if (sctx->chip_class >= GFX9) { - si_shader_selector_key_vs(sctx, sctx->shader.vs.cso, key, &key->part.tcs.ls_prolog); + si_get_vs_key_inputs(sctx, key, &key->part.tcs.ls_prolog); key->part.tcs.ls = sctx->shader.vs.cso; /* When the LS VGPR fix is needed, monolithic shaders @@ -2164,25 +2182,18 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh key->mono.u.ff_tcs_inputs_to_copy = sctx->shader.vs.cso->outputs_written; break; case MESA_SHADER_TESS_EVAL: - memset(&key->part, 0, sizeof(key->part)); - memset(&key->mono, 0, sizeof(key->mono)); - memset(&key->opt, 0, sizeof(key->opt)); - if (!sctx->shader.gs.cso) si_get_vs_key_outputs(sctx, sel, key); else si_clear_vs_key_outputs(sctx, sel, key); break; case MESA_SHADER_GEOMETRY: - memset(&key->part, 0, sizeof(key->part)); - memset(&key->mono, 0, sizeof(key->mono)); - memset(&key->opt, 0, sizeof(key->opt)); - if (sctx->chip_class >= GFX9) { if (sctx->shader.tes.cso) { + si_clear_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog); key->part.gs.es = sctx->shader.tes.cso; } else { - si_shader_selector_key_vs(sctx, sctx->shader.vs.cso, key, &key->part.gs.vs_prolog); + si_get_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog); key->part.gs.es = sctx->shader.vs.cso; } @@ -3194,6 +3205,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant, si_get_vs(sctx)->cso, si_get_vs(sctx)->current); si_update_rasterized_prim(sctx); + si_vs_key_update_inputs(sctx); } static void si_update_tess_uses_prim_id(struct si_context *sctx)