radeonsi: allow changing the NGG subgroup size to 256 but don't change it yet
Currently, 128 seems to have the best performance. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10813>
This commit is contained in:
parent
13acbaecd8
commit
9dc7fff448
|
@ -796,7 +796,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
|||
struct si_shader_selector *sel = shader->selector;
|
||||
struct si_shader_info *info = &sel->info;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
unsigned subgroup_size = 128;
|
||||
unsigned subgroup_size = ctx->screen->ngg_subgroup_size;
|
||||
unsigned max_waves = ctx->ac.wave_size == 64 ? DIV_ROUND_UP(subgroup_size, 64) :
|
||||
DIV_ROUND_UP(subgroup_size, 32);
|
||||
|
||||
|
@ -2018,18 +2018,15 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
|||
/* All these are per subgroup: */
|
||||
const unsigned min_esverts = gs_sel->screen->info.chip_class >= GFX10_3 ? 29 : 24;
|
||||
bool max_vert_out_per_gs_instance = false;
|
||||
unsigned max_gsprims_base = 128; /* default prim group size clamp */
|
||||
unsigned max_esverts_base = 128;
|
||||
unsigned max_gsprims_base = gs_sel->screen->ngg_subgroup_size; /* default prim group size clamp */
|
||||
unsigned max_esverts_base = gs_sel->screen->ngg_subgroup_size;
|
||||
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST) {
|
||||
/* Exactly 1 wave32 executes culling in primitive threads (there is no
|
||||
* divergence), other waves are idle.
|
||||
*/
|
||||
max_gsprims_base = 32;
|
||||
/* All lanes are filled in wave32. */
|
||||
max_gsprims_base = ROUND_DOWN_TO(max_gsprims_base / 3, 32);
|
||||
max_esverts_base = max_gsprims_base * 3;
|
||||
} else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP) {
|
||||
max_gsprims_base = 126;
|
||||
max_esverts_base = 128;
|
||||
max_gsprims_base = max_esverts_base - 2;
|
||||
}
|
||||
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
|
|
|
@ -1300,6 +1300,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
}
|
||||
}
|
||||
|
||||
sscreen->ngg_subgroup_size = 128;
|
||||
sscreen->ge_wave_size = 64;
|
||||
sscreen->ps_wave_size = 64;
|
||||
sscreen->compute_wave_size = 64;
|
||||
|
|
|
@ -665,6 +665,7 @@ struct si_screen {
|
|||
unsigned compute_wave_size;
|
||||
unsigned ps_wave_size;
|
||||
unsigned ge_wave_size;
|
||||
unsigned ngg_subgroup_size;
|
||||
};
|
||||
|
||||
struct si_sampler_view {
|
||||
|
|
Loading…
Reference in New Issue