ac: import ac_get_compute_resource_limits() from RadeonSI

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
2019-07-12 12:17:11 +02:00 · 2019-07-12 12:17:11 +02:00 · e510c5ee3b
parent 5f4f8aec74
commit e510c5ee3b
5 changed files with 42 additions and 39 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -895,3 +895,35 @@ ac_get_harvested_configs(struct radeon_info *info,
 		}
 	}
 }
+
+unsigned ac_get_compute_resource_limits(struct radeon_info *info,
+					unsigned waves_per_threadgroup,
+					unsigned max_waves_per_sh,
+					unsigned threadgroups_per_cu)
+{
+	unsigned compute_resource_limits =
+		S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
+
+	if (info->chip_class >= GFX7) {
+		unsigned num_cu_per_se = info->num_good_compute_units /
+					 info->max_se;
+
+		/* Force even distribution on all SIMDs in CU if the workgroup
+		 * size is 64. This has shown some good improvements if # of CUs
+		 * per SE is not a multiple of 4.
+		 */
+		if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
+			compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
+
+		assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
+		compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
+					   S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
+	} else {
+		/* GFX6 */
+		if (max_waves_per_sh) {
+			unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
+			compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16);
+		}
+	}
+	return compute_resource_limits;
+}
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -167,6 +167,10 @@ void ac_get_harvested_configs(struct radeon_info *info,
 			      unsigned raster_config,
 			      unsigned *cik_raster_config_1_p,
 			      unsigned *raster_config_se);
+unsigned ac_get_compute_resource_limits(struct radeon_info *info,
+					unsigned waves_per_threadgroup,
+					unsigned max_waves_per_sh,
+					unsigned threadgroups_per_cu);

 static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
 {
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@ -772,38 +772,6 @@ static void si_setup_tgsi_user_data(struct si_context *sctx,
 	}
 }

-unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
-					unsigned waves_per_threadgroup,
-					unsigned max_waves_per_sh,
-					unsigned threadgroups_per_cu)
-{
-	unsigned compute_resource_limits =
-		S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
-
-	if (sscreen->info.chip_class >= GFX7) {
-		unsigned num_cu_per_se = sscreen->info.num_good_compute_units /
-					 sscreen->info.max_se;
-
-		/* Force even distribution on all SIMDs in CU if the workgroup
-		 * size is 64. This has shown some good improvements if # of CUs
-		 * per SE is not a multiple of 4.
-		 */
-		if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
-			compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
-
-		assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
-		compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
-					   S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
-	} else {
-		/* GFX6 */
-		if (max_waves_per_sh) {
-			unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
-			compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16);
-		}
-	}
-	return compute_resource_limits;
-}
-
 static void si_emit_dispatch_packets(struct si_context *sctx,
                                     const struct pipe_grid_info *info)
 {
@ -820,7 +788,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
 		threadgroups_per_cu = 2;

 	radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
-			  si_get_compute_resource_limits(sscreen, waves_per_threadgroup,
+			  ac_get_compute_resource_limits(&sscreen->info,
+							 waves_per_threadgroup,
 							 sctx->cs_max_waves_per_sh,
 							 threadgroups_per_cu));

--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@ -1426,8 +1426,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
 				S_00B84C_LDS_SIZE(shader->config.lds_size));

 		radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
-			si_get_compute_resource_limits(sctx->screen, WAVES_PER_TG,
-						       MAX_WAVES_PER_SH, THREADGROUPS_PER_CU));
+			ac_get_compute_resource_limits(&sctx->screen->info,
+						       WAVES_PER_TG,
+						       MAX_WAVES_PER_SH,
+						       THREADGROUPS_PER_CU));
 		sctx->compute_ib_last_shader = shader;
 	}

--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@ -1396,10 +1396,6 @@ unsigned si_end_counter(struct si_screen *sscreen, unsigned type,

 /* si_compute.c */
 void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs);
-unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
-					unsigned waves_per_threadgroup,
-					unsigned max_waves_per_sh,
-					unsigned threadgroups_per_cu);
 void si_init_compute_functions(struct si_context *sctx);

 /* si_compute_prim_discard.c */