ac/gpu_info: fix more non-coherent RB and GL2 combinations

It ignored non-harvested chips with a non-power-of-two memory bus. Fixes: abed921ce7 - amd: add support for Navy Flounder Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9568>
2021-03-12 10:26:54 -05:00 · 2021-03-12 10:26:54 -05:00 · 32eb74e1e1
parent d4bcb58caf
commit 32eb74e1e1
4 changed files with 10 additions and 7 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -667,11 +667,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
      info->tcc_cache_line_size = 128;

      if (info->drm_minor >= 35) {
-         info->tcc_harvested = device_info.tcc_disabled_mask != 0;
         info->num_tcc_blocks = info->max_tcc_blocks - util_bitcount64(device_info.tcc_disabled_mask);
      } else {
         /* This is a hack, but it's all we can do without a kernel upgrade. */
-         info->tcc_harvested = (info->vram_size / info->max_tcc_blocks) != 512 * 1024 * 1024;
+         info->num_tcc_blocks = info->vram_size / (512 * 1024 * 1024);
+         if (info->num_tcc_blocks > info->max_tcc_blocks)
+            info->num_tcc_blocks /= 2;
      }
   } else {
      if (!info->has_graphics && info->family >= CHIP_ALDEBARAN)
@ -682,6 +683,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
      info->num_tcc_blocks = info->max_tcc_blocks;
   }

+   info->tcc_rb_non_coherent = !util_is_power_of_two_or_zero(info->num_tcc_blocks);
+
   switch (info->family) {
   case CHIP_TAHITI:
   case CHIP_PITCAIRN:
@ -1070,7 +1073,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
   fprintf(f, "    max_tcc_blocks = %i\n", info->max_tcc_blocks);
   fprintf(f, "    num_tcc_blocks = %i\n", info->num_tcc_blocks);
   fprintf(f, "    tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
-   fprintf(f, "    tcc_harvested = %u\n", info->tcc_harvested);
+   fprintf(f, "    tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent);
   fprintf(f, "    pc_lines = %u\n", info->pc_lines);
   fprintf(f, "    lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
   fprintf(f, "    lds_alloc_granularity = %i\n", info->lds_alloc_granularity);
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -108,7 +108,7 @@ struct radeon_info {
   uint32_t max_tcc_blocks;
   uint32_t num_tcc_blocks;
   uint32_t tcc_cache_line_size;
-   bool tcc_harvested;
+   bool tcc_rb_non_coherent; /* whether L2 inv is needed for render->texture transitions */
   unsigned pc_lines;
   uint32_t lds_size_per_workgroup;
   uint32_t lds_alloc_granularity;
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@ -3319,7 +3319,7 @@ static bool
 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
 {
 	if (device->physical_device->rad_info.chip_class >= GFX10) {
-		return !device->physical_device->rad_info.tcc_harvested &&
+		return !device->physical_device->rad_info.tcc_rb_non_coherent &&
 			(image && !radv_image_is_pipe_misaligned(device, image));
 	} else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
 		if (image->info.samples == 1 &&
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@ -1768,7 +1768,7 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned
   sctx->force_cb_shader_coherent = false;

   if (sctx->chip_class >= GFX10) {
-      if (sctx->screen->info.tcc_harvested)
+      if (sctx->screen->info.tcc_rb_non_coherent)
         sctx->flags |= SI_CONTEXT_INV_L2;
      else if (shaders_read_metadata)
         sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
@ -1793,7 +1793,7 @@ static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned
   sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_VCACHE;

   if (sctx->chip_class >= GFX10) {
-      if (sctx->screen->info.tcc_harvested)
+      if (sctx->screen->info.tcc_rb_non_coherent)
         sctx->flags |= SI_CONTEXT_INV_L2;
      else if (shaders_read_metadata)
         sctx->flags |= SI_CONTEXT_INV_L2_METADATA;