radeonsi: simplify the NGG culling condition in si_draw_vbo
Changes: - disallow NGG culling for GS, fast launch for tess using template args (GS can't do NGG culling, tess can't do fast launch) - skip checking current_rast_prim with tessellation (bake the condition into ngg_cull_vert_threshold) - use only 1 vertex count threshold for enabling NGG shader culling to simplify it. I think it doesn't have a big impact. The threshold computation depends on more parameters than just fast launch. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8434>
This commit is contained in:
parent
7581743510
commit
ffbf3a5f8b
|
@ -449,7 +449,6 @@ struct si_shader_selector {
|
|||
ubyte num_vbos_in_user_sgprs;
|
||||
unsigned pa_cl_vs_out_cntl;
|
||||
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
|
||||
unsigned ngg_cull_nonindexed_fast_launch_vert_threshold; /* UINT32_MAX = disabled */
|
||||
ubyte clipdist_mask;
|
||||
ubyte culldist_mask;
|
||||
enum pipe_prim_type rast_prim;
|
||||
|
|
|
@ -1974,23 +1974,22 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
/* Update NGG culling settings. */
|
||||
uint8_t old_ngg_culling = sctx->ngg_culling;
|
||||
if (GFX_VERSION >= GFX10) {
|
||||
struct si_shader_selector *hw_vs;
|
||||
if (NGG && !dispatch_prim_discard_cs && sctx->current_rast_prim == PIPE_PRIM_TRIANGLES &&
|
||||
(hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso) &&
|
||||
(total_direct_count > hw_vs->ngg_cull_vert_threshold ||
|
||||
(!index_size &&
|
||||
total_direct_count > hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold &&
|
||||
prim & ((1 << PIPE_PRIM_TRIANGLES) |
|
||||
(1 << PIPE_PRIM_TRIANGLE_STRIP))))) {
|
||||
struct si_shader_selector *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso;
|
||||
|
||||
if (NGG && !HAS_GS && !dispatch_prim_discard_cs &&
|
||||
/* Tessellation sets ngg_cull_vert_threshold to UINT_MAX if the prim type
|
||||
* is not triangles, so this check is only needed without tessellation. */
|
||||
(HAS_TESS || sctx->current_rast_prim == PIPE_PRIM_TRIANGLES) &&
|
||||
total_direct_count > hw_vs->ngg_cull_vert_threshold) {
|
||||
uint8_t ngg_culling = sctx->viewport0_y_inverted ? rs->ngg_cull_flags_y_inverted :
|
||||
rs->ngg_cull_flags;
|
||||
|
||||
/* Use NGG fast launch for certain primitive types.
|
||||
* A draw must have at least 1 full primitive.
|
||||
* The fast launch doesn't work with tessellation.
|
||||
*/
|
||||
if (ngg_culling &&
|
||||
hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold < UINT32_MAX &&
|
||||
min_direct_count >= 3 && !HAS_TESS && !HAS_GS) {
|
||||
if (!HAS_TESS && ngg_culling && min_direct_count >= 3 &&
|
||||
!(sctx->screen->debug_flags & DBG(NO_FAST_LAUNCH))) {
|
||||
if (prim == PIPE_PRIM_TRIANGLES && !index_size) {
|
||||
ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST;
|
||||
} else if (prim == PIPE_PRIM_TRIANGLE_STRIP) {
|
||||
|
|
|
@ -2798,16 +2798,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
!sel->info.base.vs.window_space_position));
|
||||
|
||||
sel->ngg_cull_vert_threshold = UINT_MAX; /* disabled (changed below) */
|
||||
sel->ngg_cull_nonindexed_fast_launch_vert_threshold = UINT_MAX;
|
||||
|
||||
if (ngg_culling_allowed) {
|
||||
if (sel->info.stage == MESA_SHADER_VERTEX) {
|
||||
/* 1000 non-indexed vertices (roughly 8 primgroups) are needed
|
||||
* per draw call (no TES/GS) to enable NGG culling by default.
|
||||
*/
|
||||
if (!(sscreen->debug_flags & DBG(NO_FAST_LAUNCH)))
|
||||
sel->ngg_cull_nonindexed_fast_launch_vert_threshold = 1000;
|
||||
|
||||
if (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL))
|
||||
sel->ngg_cull_vert_threshold = 0; /* always enabled */
|
||||
else if (sscreen->options.shader_culling ||
|
||||
|
@ -2816,9 +2809,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sscreen->info.is_pro_graphics))
|
||||
sel->ngg_cull_vert_threshold = 1500; /* vertex count must be more than this */
|
||||
} else if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) ||
|
||||
sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) ||
|
||||
sscreen->info.chip_class == GFX10_3)
|
||||
if (sel->rast_prim == PIPE_PRIM_TRIANGLES &&
|
||||
(sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) ||
|
||||
sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) ||
|
||||
sscreen->info.chip_class == GFX10_3))
|
||||
sel->ngg_cull_vert_threshold = 0; /* always enabled */
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue