ac: split lds_granularity into encode and allocation granularities
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8761>
This commit is contained in:
parent
df61444ac4
commit
aa53335135
|
@ -722,7 +722,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||||
* LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
|
* LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
|
||||||
*/
|
*/
|
||||||
info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
|
info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||||
info->lds_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
|
/* lds_encode_granularity is the block size used for encoding registers.
|
||||||
|
* lds_alloc_granularity is what the hardware will align the LDS size to.
|
||||||
|
*/
|
||||||
|
info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
|
||||||
|
info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||||
|
|
||||||
assert(util_is_power_of_two_or_zero(dma.available_rings + 1));
|
assert(util_is_power_of_two_or_zero(dma.available_rings + 1));
|
||||||
assert(util_is_power_of_two_or_zero(compute.available_rings + 1));
|
assert(util_is_power_of_two_or_zero(compute.available_rings + 1));
|
||||||
|
@ -1058,7 +1062,8 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
||||||
fprintf(f, " tcc_harvested = %u\n", info->tcc_harvested);
|
fprintf(f, " tcc_harvested = %u\n", info->tcc_harvested);
|
||||||
fprintf(f, " pc_lines = %u\n", info->pc_lines);
|
fprintf(f, " pc_lines = %u\n", info->pc_lines);
|
||||||
fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
|
fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
|
||||||
fprintf(f, " lds_granularity = %i\n", info->lds_granularity);
|
fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity);
|
||||||
|
fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity);
|
||||||
fprintf(f, " max_memory_clock = %i\n", info->max_memory_clock);
|
fprintf(f, " max_memory_clock = %i\n", info->max_memory_clock);
|
||||||
fprintf(f, " ce_ram_size = %i\n", info->ce_ram_size);
|
fprintf(f, " ce_ram_size = %i\n", info->ce_ram_size);
|
||||||
fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size);
|
fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size);
|
||||||
|
|
|
@ -110,7 +110,8 @@ struct radeon_info {
|
||||||
bool tcc_harvested;
|
bool tcc_harvested;
|
||||||
unsigned pc_lines;
|
unsigned pc_lines;
|
||||||
uint32_t lds_size_per_workgroup;
|
uint32_t lds_size_per_workgroup;
|
||||||
uint32_t lds_granularity;
|
uint32_t lds_alloc_granularity;
|
||||||
|
uint32_t lds_encode_granularity;
|
||||||
uint32_t max_memory_clock;
|
uint32_t max_memory_clock;
|
||||||
uint32_t ce_ram_size;
|
uint32_t ce_ram_size;
|
||||||
uint32_t l1_cache_size;
|
uint32_t l1_cache_size;
|
||||||
|
|
|
@ -405,7 +405,7 @@ static void ac_fill_sqtt_asic_info(struct radeon_info *rad_info,
|
||||||
chunk->max_memory_clock = rad_info->max_memory_clock * 1000000;
|
chunk->max_memory_clock = rad_info->max_memory_clock * 1000000;
|
||||||
chunk->memory_ops_per_clock = 0;
|
chunk->memory_ops_per_clock = 0;
|
||||||
chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type);
|
chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type);
|
||||||
chunk->lds_granularity = rad_info->lds_granularity;
|
chunk->lds_granularity = rad_info->lds_encode_granularity;
|
||||||
|
|
||||||
for (unsigned se = 0; se < 4; se++) {
|
for (unsigned se = 0; se < 4; se++) {
|
||||||
for (unsigned sa = 0; sa < 2; sa++) {
|
for (unsigned sa = 0; sa < 2; sa++) {
|
||||||
|
|
Loading…
Reference in New Issue