radeonsi: precompute si_vgt_stages_key for NGG in si_shader
to remove this overhead from si_update_shaders Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
parent
5a131566b1
commit
7a20110ad3
|
@ -837,35 +837,6 @@ union si_vgt_param_key {
|
||||||
uint16_t index;
|
uint16_t index;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SI_NUM_VGT_STAGES_KEY_BITS 6
|
|
||||||
#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
|
|
||||||
|
|
||||||
/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
|
|
||||||
* Some fields are set by state-change calls, most are set by draw_vbo.
|
|
||||||
*/
|
|
||||||
union si_vgt_stages_key {
|
|
||||||
struct {
|
|
||||||
#if UTIL_ARCH_LITTLE_ENDIAN
|
|
||||||
uint8_t tess : 1;
|
|
||||||
uint8_t gs : 1;
|
|
||||||
uint8_t ngg_gs_fast_launch : 1;
|
|
||||||
uint8_t ngg_passthrough : 1;
|
|
||||||
uint8_t ngg : 1; /* gfx10+ */
|
|
||||||
uint8_t streamout : 1; /* only used with NGG */
|
|
||||||
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
|
|
||||||
#else /* UTIL_ARCH_BIG_ENDIAN */
|
|
||||||
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
|
|
||||||
uint8_t streamout : 1;
|
|
||||||
uint8_t ngg : 1;
|
|
||||||
uint8_t ngg_passthrough : 1;
|
|
||||||
uint8_t ngg_gs_fast_launch : 1;
|
|
||||||
uint8_t gs : 1;
|
|
||||||
uint8_t tess : 1;
|
|
||||||
#endif
|
|
||||||
} u;
|
|
||||||
uint8_t index;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct si_texture_handle {
|
struct si_texture_handle {
|
||||||
unsigned desc_slot;
|
unsigned desc_slot;
|
||||||
bool desc_dirty;
|
bool desc_dirty;
|
||||||
|
|
|
@ -729,6 +729,35 @@ struct gfx9_gs_info {
|
||||||
unsigned esgs_ring_size; /* in bytes */
|
unsigned esgs_ring_size; /* in bytes */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define SI_NUM_VGT_STAGES_KEY_BITS 6
|
||||||
|
#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
|
||||||
|
|
||||||
|
/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
|
||||||
|
* Some fields are set by state-change calls, most are set by draw_vbo.
|
||||||
|
*/
|
||||||
|
union si_vgt_stages_key {
|
||||||
|
struct {
|
||||||
|
#if UTIL_ARCH_LITTLE_ENDIAN
|
||||||
|
uint8_t tess : 1;
|
||||||
|
uint8_t gs : 1;
|
||||||
|
uint8_t ngg_gs_fast_launch : 1;
|
||||||
|
uint8_t ngg_passthrough : 1;
|
||||||
|
uint8_t ngg : 1; /* gfx10+ */
|
||||||
|
uint8_t streamout : 1; /* only used with NGG */
|
||||||
|
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
|
||||||
|
#else /* UTIL_ARCH_BIG_ENDIAN */
|
||||||
|
uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;
|
||||||
|
uint8_t streamout : 1;
|
||||||
|
uint8_t ngg : 1;
|
||||||
|
uint8_t ngg_passthrough : 1;
|
||||||
|
uint8_t ngg_gs_fast_launch : 1;
|
||||||
|
uint8_t gs : 1;
|
||||||
|
uint8_t tess : 1;
|
||||||
|
#endif
|
||||||
|
} u;
|
||||||
|
uint8_t index;
|
||||||
|
};
|
||||||
|
|
||||||
struct si_shader {
|
struct si_shader {
|
||||||
struct si_compiler_ctx_state compiler_ctx_state;
|
struct si_compiler_ctx_state compiler_ctx_state;
|
||||||
|
|
||||||
|
@ -812,6 +841,7 @@ struct si_shader {
|
||||||
unsigned pa_cl_ngg_cntl;
|
unsigned pa_cl_ngg_cntl;
|
||||||
unsigned vgt_gs_max_vert_out; /* for API GS */
|
unsigned vgt_gs_max_vert_out; /* for API GS */
|
||||||
unsigned ge_pc_alloc; /* uconfig register */
|
unsigned ge_pc_alloc; /* uconfig register */
|
||||||
|
union si_vgt_stages_key vgt_stages;
|
||||||
} ngg;
|
} ngg;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -178,16 +178,8 @@ static bool si_update_shaders(struct si_context *sctx)
|
||||||
key.u.tess = 1;
|
key.u.tess = 1;
|
||||||
if (HAS_GS)
|
if (HAS_GS)
|
||||||
key.u.gs = 1;
|
key.u.gs = 1;
|
||||||
|
if (NGG)
|
||||||
if (NGG) {
|
key.index |= si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->ctx_reg.ngg.vgt_stages.index;
|
||||||
struct si_shader *vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
|
|
||||||
|
|
||||||
key.u.ngg = 1;
|
|
||||||
key.u.streamout = !!si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso->so.num_outputs;
|
|
||||||
/* These must be done after the shader variant is selected. */
|
|
||||||
key.u.ngg_passthrough = gfx10_is_ngg_passthrough(vs);
|
|
||||||
key.u.ngg_gs_fast_launch = !!(vs->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index];
|
struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index];
|
||||||
if (unlikely(!*pm4))
|
if (unlikely(!*pm4))
|
||||||
|
|
|
@ -1346,6 +1346,12 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||||
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
|
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
|
||||||
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1);
|
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
|
||||||
|
shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs;
|
||||||
|
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
|
||||||
|
shader->ctx_reg.ngg.vgt_stages.u.ngg_gs_fast_launch =
|
||||||
|
!!(shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_shader_vs(struct si_context *sctx)
|
static void si_emit_shader_vs(struct si_context *sctx)
|
||||||
|
|
Loading…
Reference in New Issue