radeonsi: simplify the NGG culling condition in si_draw_vbo

Changes:

- disallow NGG culling for GS, fast launch for tess using template args
  (GS can't do NGG culling, tess can't do fast launch)

- skip checking current_rast_prim with tessellation
  (bake the condition into ngg_cull_vert_threshold)

- use only 1 vertex count threshold for enabling NGG shader culling
  to simplify it. I think it doesn't have a big impact. The threshold
  computation depends on more parameters than just fast launch.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8434>
This commit is contained in:
Marek Olšák 2021-01-14 07:49:11 -05:00 committed by Marge Bot
parent 7581743510
commit ffbf3a5f8b
3 changed files with 14 additions and 22 deletions

View File

@ -449,7 +449,6 @@ struct si_shader_selector {
ubyte num_vbos_in_user_sgprs;
unsigned pa_cl_vs_out_cntl;
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
unsigned ngg_cull_nonindexed_fast_launch_vert_threshold; /* UINT32_MAX = disabled */
ubyte clipdist_mask;
ubyte culldist_mask;
enum pipe_prim_type rast_prim;

View File

@ -1974,23 +1974,22 @@ static void si_draw_vbo(struct pipe_context *ctx,
/* Update NGG culling settings. */
uint8_t old_ngg_culling = sctx->ngg_culling;
if (GFX_VERSION >= GFX10) {
struct si_shader_selector *hw_vs;
if (NGG && !dispatch_prim_discard_cs && sctx->current_rast_prim == PIPE_PRIM_TRIANGLES &&
(hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso) &&
(total_direct_count > hw_vs->ngg_cull_vert_threshold ||
(!index_size &&
total_direct_count > hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold &&
prim & ((1 << PIPE_PRIM_TRIANGLES) |
(1 << PIPE_PRIM_TRIANGLE_STRIP))))) {
struct si_shader_selector *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->cso;
if (NGG && !HAS_GS && !dispatch_prim_discard_cs &&
/* Tessellation sets ngg_cull_vert_threshold to UINT_MAX if the prim type
* is not triangles, so this check is only needed without tessellation. */
(HAS_TESS || sctx->current_rast_prim == PIPE_PRIM_TRIANGLES) &&
total_direct_count > hw_vs->ngg_cull_vert_threshold) {
uint8_t ngg_culling = sctx->viewport0_y_inverted ? rs->ngg_cull_flags_y_inverted :
rs->ngg_cull_flags;
/* Use NGG fast launch for certain primitive types.
* A draw must have at least 1 full primitive.
* The fast launch doesn't work with tessellation.
*/
if (ngg_culling &&
hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold < UINT32_MAX &&
min_direct_count >= 3 && !HAS_TESS && !HAS_GS) {
if (!HAS_TESS && ngg_culling && min_direct_count >= 3 &&
!(sctx->screen->debug_flags & DBG(NO_FAST_LAUNCH))) {
if (prim == PIPE_PRIM_TRIANGLES && !index_size) {
ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST;
} else if (prim == PIPE_PRIM_TRIANGLE_STRIP) {

View File

@ -2798,16 +2798,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
!sel->info.base.vs.window_space_position));
sel->ngg_cull_vert_threshold = UINT_MAX; /* disabled (changed below) */
sel->ngg_cull_nonindexed_fast_launch_vert_threshold = UINT_MAX;
if (ngg_culling_allowed) {
if (sel->info.stage == MESA_SHADER_VERTEX) {
/* 1000 non-indexed vertices (roughly 8 primgroups) are needed
* per draw call (no TES/GS) to enable NGG culling by default.
*/
if (!(sscreen->debug_flags & DBG(NO_FAST_LAUNCH)))
sel->ngg_cull_nonindexed_fast_launch_vert_threshold = 1000;
if (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL))
sel->ngg_cull_vert_threshold = 0; /* always enabled */
else if (sscreen->options.shader_culling ||
@ -2816,9 +2809,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sscreen->info.is_pro_graphics))
sel->ngg_cull_vert_threshold = 1500; /* vertex count must be more than this */
} else if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
if (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) ||
sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) ||
sscreen->info.chip_class == GFX10_3)
if (sel->rast_prim == PIPE_PRIM_TRIANGLES &&
(sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) ||
sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) ||
sscreen->info.chip_class == GFX10_3))
sel->ngg_cull_vert_threshold = 0; /* always enabled */
}
}