ac/gpu_info: fix more non-coherent RB and GL2 combinations

It ignored non-harvested chips with a non-power-of-two memory bus.

Fixes: abed921ce7 - amd: add support for Navy Flounder

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9568>
This commit is contained in:
Marek Olšák 2021-03-12 10:26:54 -05:00 committed by Marge Bot
parent d4bcb58caf
commit 32eb74e1e1
4 changed files with 10 additions and 7 deletions

View File

@ -667,11 +667,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->tcc_cache_line_size = 128;
if (info->drm_minor >= 35) {
info->tcc_harvested = device_info.tcc_disabled_mask != 0;
info->num_tcc_blocks = info->max_tcc_blocks - util_bitcount64(device_info.tcc_disabled_mask);
} else {
/* This is a hack, but it's all we can do without a kernel upgrade. */
info->tcc_harvested = (info->vram_size / info->max_tcc_blocks) != 512 * 1024 * 1024;
info->num_tcc_blocks = info->vram_size / (512 * 1024 * 1024);
if (info->num_tcc_blocks > info->max_tcc_blocks)
info->num_tcc_blocks /= 2;
}
} else {
if (!info->has_graphics && info->family >= CHIP_ALDEBARAN)
@ -682,6 +683,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->num_tcc_blocks = info->max_tcc_blocks;
}
info->tcc_rb_non_coherent = !util_is_power_of_two_or_zero(info->num_tcc_blocks);
switch (info->family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
@ -1070,7 +1073,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " max_tcc_blocks = %i\n", info->max_tcc_blocks);
fprintf(f, " num_tcc_blocks = %i\n", info->num_tcc_blocks);
fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
fprintf(f, " tcc_harvested = %u\n", info->tcc_harvested);
fprintf(f, " tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent);
fprintf(f, " pc_lines = %u\n", info->pc_lines);
fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity);

View File

@ -108,7 +108,7 @@ struct radeon_info {
uint32_t max_tcc_blocks;
uint32_t num_tcc_blocks;
uint32_t tcc_cache_line_size;
bool tcc_harvested;
bool tcc_rb_non_coherent; /* whether L2 inv is needed for render->texture transitions */
unsigned pc_lines;
uint32_t lds_size_per_workgroup;
uint32_t lds_alloc_granularity;

View File

@ -3319,7 +3319,7 @@ static bool
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->rad_info.chip_class >= GFX10) {
return !device->physical_device->rad_info.tcc_harvested &&
return !device->physical_device->rad_info.tcc_rb_non_coherent &&
(image && !radv_image_is_pipe_misaligned(device, image));
} else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
if (image->info.samples == 1 &&

View File

@ -1768,7 +1768,7 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned
sctx->force_cb_shader_coherent = false;
if (sctx->chip_class >= GFX10) {
if (sctx->screen->info.tcc_harvested)
if (sctx->screen->info.tcc_rb_non_coherent)
sctx->flags |= SI_CONTEXT_INV_L2;
else if (shaders_read_metadata)
sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
@ -1793,7 +1793,7 @@ static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_VCACHE;
if (sctx->chip_class >= GFX10) {
if (sctx->screen->info.tcc_harvested)
if (sctx->screen->info.tcc_rb_non_coherent)
sctx->flags |= SI_CONTEXT_INV_L2;
else if (shaders_read_metadata)
sctx->flags |= SI_CONTEXT_INV_L2_METADATA;