radeonsi: precompute si_vgt_stages_key for NGG in si_shader

to remove this overhead from si_update_shaders

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
Marek Olšák 2021-08-10 11:27:53 -04:00 committed by Marge Bot
parent 5a131566b1
commit 7a20110ad3
4 changed files with 38 additions and 39 deletions

View File

@ -837,35 +837,6 @@ union si_vgt_param_key {
uint16_t index;
};
#define SI_NUM_VGT_STAGES_KEY_BITS 6
#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
* Some fields are set by state-change calls, most are set by draw_vbo.
*/
union si_vgt_stages_key {
struct {
#if UTIL_ARCH_LITTLE_ENDIAN
uint8_t tess : 1;
uint8_t gs : 1;
uint8_t ngg_gs_fast_launch : 1;
uint8_t ngg_passthrough : 1;
uint8_t ngg : 1; /* gfx10+ */
uint8_t streamout : 1; /* only used with NGG */
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
#else /* UTIL_ARCH_BIG_ENDIAN */
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
uint8_t streamout : 1;
uint8_t ngg : 1;
uint8_t ngg_passthrough : 1;
uint8_t ngg_gs_fast_launch : 1;
uint8_t gs : 1;
uint8_t tess : 1;
#endif
} u;
uint8_t index;
};
struct si_texture_handle {
unsigned desc_slot;
bool desc_dirty;

View File

@ -729,6 +729,35 @@ struct gfx9_gs_info {
unsigned esgs_ring_size; /* in bytes */
};
#define SI_NUM_VGT_STAGES_KEY_BITS 6
#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
* Some fields are set by state-change calls, most are set by draw_vbo.
*/
union si_vgt_stages_key {
struct {
#if UTIL_ARCH_LITTLE_ENDIAN
uint8_t tess : 1;
uint8_t gs : 1;
uint8_t ngg_gs_fast_launch : 1;
uint8_t ngg_passthrough : 1;
uint8_t ngg : 1; /* gfx10+ */
uint8_t streamout : 1; /* only used with NGG */
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
#else /* UTIL_ARCH_BIG_ENDIAN */
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
uint8_t streamout : 1;
uint8_t ngg : 1;
uint8_t ngg_passthrough : 1;
uint8_t ngg_gs_fast_launch : 1;
uint8_t gs : 1;
uint8_t tess : 1;
#endif
} u;
uint8_t index;
};
struct si_shader {
struct si_compiler_ctx_state compiler_ctx_state;
@ -812,6 +841,7 @@ struct si_shader {
unsigned pa_cl_ngg_cntl;
unsigned vgt_gs_max_vert_out; /* for API GS */
unsigned ge_pc_alloc; /* uconfig register */
union si_vgt_stages_key vgt_stages;
} ngg;
struct {

View File

@ -178,16 +178,8 @@ static bool si_update_shaders(struct si_context *sctx)
key.u.tess = 1;
if (HAS_GS)
key.u.gs = 1;
if (NGG) {
struct si_shader *vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
key.u.ngg = 1;
key.u.streamout = !!si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso->so.num_outputs;
/* These must be done after the shader variant is selected. */
key.u.ngg_passthrough = gfx10_is_ngg_passthrough(vs);
key.u.ngg_gs_fast_launch = !!(vs->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
}
if (NGG)
key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ctx_reg.ngg.vgt_stages.index;
struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index];
if (unlikely(!*pm4))

View File

@ -1346,6 +1346,12 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1);
}
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs;
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
shader->ctx_reg.ngg.vgt_stages.u.ngg_gs_fast_launch =
!!(shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
}
static void si_emit_shader_vs(struct si_context *sctx)