ac: Calculate workgroup sizes of HW stages that operate in workgroups.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12321>
This commit is contained in:
Timur Kristóf 2021-08-11 08:57:04 +02:00 committed by Marge Bot
parent 66b5f05727
commit 395c0c52c7
2 changed files with 84 additions and 0 deletions

View File

@ -25,6 +25,7 @@
#include "ac_gpu_info.h"
#include "sid.h"
#include "u_math.h"
#include <assert.h>
#include <stdlib.h>
@ -511,3 +512,72 @@ void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_cu
else /* VS */
*late_alloc_wave64 = MIN2(*late_alloc_wave64, G_00B11C_LIMIT(~0u));
}
unsigned ac_compute_cs_workgroup_size(uint16_t sizes[3], bool variable, unsigned max)
{
if (variable)
return max;
return sizes[0] * sizes[1] * sizes[2];
}
unsigned ac_compute_lshs_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
unsigned tess_num_patches,
unsigned tess_patch_in_vtx,
unsigned tess_patch_out_vtx)
{
/* When tessellation is used, API VS runs on HW LS, API TCS runs on HW HS.
* These two HW stages are merged on GFX9+.
*/
bool merged_shaders = chip_class >= GFX9;
unsigned ls_workgroup_size = tess_num_patches * tess_patch_in_vtx;
unsigned hs_workgroup_size = tess_num_patches * tess_patch_out_vtx;
if (merged_shaders)
return MAX2(ls_workgroup_size, hs_workgroup_size);
else if (stage == MESA_SHADER_VERTEX)
return ls_workgroup_size;
else if (stage == MESA_SHADER_TESS_CTRL)
return hs_workgroup_size;
else
unreachable("invalid LSHS shader stage");
}
unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wave_size,
unsigned es_verts, unsigned gs_inst_prims)
{
/* ESGS may operate in workgroups if on-chip GS (LDS rings) are enabled.
*
* GFX6: Not possible in the HW.
* GFX7-8 (unmerged): possible in the HW, but not implemented in Mesa.
* GFX9+ (merged): implemented in Mesa.
*/
if (chip_class <= GFX8)
return wave_size;
unsigned workgroup_size = MAX2(es_verts, gs_inst_prims);
return CLAMP(workgroup_size, 1, 256);
}
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
unsigned max_vtx_out, unsigned prim_amp_factor)
{
/* NGG always operates in workgroups.
*
* For API VS/TES/GS:
* - 1 invocation per input vertex
* - 1 invocation per input primitive
*
* The same invocation can process both an input vertex and primitive,
* however 1 invocation can only output up to 1 vertex and 1 primitive.
*/
unsigned max_vtx_in = es_verts < 256 ? es_verts : 3 * gs_inst_prims;
unsigned max_prim_in = gs_inst_prims;
unsigned max_prim_out = gs_inst_prims * prim_amp_factor;
unsigned workgroup_size = MAX4(max_vtx_in, max_vtx_out, max_prim_in, max_prim_out);
return CLAMP(workgroup_size, 1, 256);
}

View File

@ -27,6 +27,7 @@
#include "ac_binary.h"
#include "amd_family.h"
#include "compiler/nir/nir.h"
#include "compiler/shader_enums.h"
#include <stdbool.h>
#include <stdint.h>
@ -104,6 +105,19 @@ void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype,
void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_culling,
bool uses_scratch, unsigned *late_alloc_wave64, unsigned *cu_mask);
unsigned ac_compute_cs_workgroup_size(uint16_t sizes[3], bool variable, unsigned max);
unsigned ac_compute_lshs_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
unsigned tess_num_patches,
unsigned tess_patch_in_vtx,
unsigned tess_patch_out_vtx);
unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wave_size,
unsigned es_verts, unsigned gs_inst_prims);
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
unsigned max_vtx_out, unsigned prim_amp_factor);
#ifdef __cplusplus
}
#endif