ac/gpu_info: rename info fields to num_cu, memory_bus_width, memory_freq_mhz
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411>
This commit is contained in:
parent
38a2a2da3e
commit
f218c3d795
|
@ -814,7 +814,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->max_heap_size_kb = info->gart_size_kb;
|
||||
|
||||
info->vram_type = amdinfo->vram_type;
|
||||
info->vram_bit_width = amdinfo->vram_bit_width;
|
||||
info->memory_bus_width = amdinfo->vram_bit_width;
|
||||
|
||||
/* Set which chips have uncached device memory. */
|
||||
info->has_l2_uncached = info->gfx_level >= GFX9;
|
||||
|
@ -822,7 +822,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
/* Set hardware information. */
|
||||
/* convert the shader/memory clocks from KHz to MHz */
|
||||
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
|
||||
info->max_memory_clock = amdinfo->max_memory_clk / 1000;
|
||||
info->memory_freq_mhz = amdinfo->max_memory_clk / 1000;
|
||||
info->max_tcc_blocks = device_info.num_tcc_blocks;
|
||||
info->max_se = amdinfo->num_shader_engines;
|
||||
info->max_sa_per_se = amdinfo->num_shader_arrays_per_engine;
|
||||
|
@ -1058,7 +1058,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->me_fw_feature >= 52);
|
||||
|
||||
/* Get the number of good compute units. */
|
||||
info->num_good_compute_units = 0;
|
||||
info->num_cu = 0;
|
||||
for (i = 0; i < info->max_se; i++) {
|
||||
for (j = 0; j < info->max_sa_per_se; j++) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
|
@ -1081,7 +1081,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
} else {
|
||||
info->cu_mask[i][j] = amdinfo->cu_bitmap[i][j];
|
||||
}
|
||||
info->num_good_compute_units += util_bitcount(info->cu_mask[i][j]);
|
||||
info->num_cu += util_bitcount(info->cu_mask[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1107,10 +1107,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
*/
|
||||
unsigned cu_group = info->gfx_level >= GFX10 ? 2 : 1;
|
||||
info->max_good_cu_per_sa =
|
||||
DIV_ROUND_UP(info->num_good_compute_units, (info->num_se * info->max_sa_per_se * cu_group)) *
|
||||
DIV_ROUND_UP(info->num_cu, (info->num_se * info->max_sa_per_se * cu_group)) *
|
||||
cu_group;
|
||||
info->min_good_cu_per_sa =
|
||||
(info->num_good_compute_units / (info->num_se * info->max_sa_per_se * cu_group)) * cu_group;
|
||||
(info->num_cu / (info->num_se * info->max_sa_per_se * cu_group)) * cu_group;
|
||||
|
||||
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode));
|
||||
info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
|
||||
|
@ -1380,7 +1380,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024 * 1024));
|
||||
fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024));
|
||||
fprintf(f, " vram_type = %i\n", info->vram_type);
|
||||
fprintf(f, " vram_bit_width = %i\n", info->vram_bit_width);
|
||||
fprintf(f, " memory_bus_width = %i\n", info->memory_bus_width);
|
||||
fprintf(f, " max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024));
|
||||
fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size);
|
||||
fprintf(f, " address32_hi = 0x%x\n", info->address32_hi);
|
||||
|
@ -1395,7 +1395,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
|
||||
fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity);
|
||||
fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity);
|
||||
fprintf(f, " max_memory_clock = %i MHz\n", info->max_memory_clock);
|
||||
fprintf(f, " memory_freq = %i MHz\n", info->memory_freq_mhz);
|
||||
fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size);
|
||||
fprintf(f, " l2_cache_size = %i\n", info->l2_cache_size);
|
||||
|
||||
|
@ -1462,7 +1462,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
}
|
||||
fprintf(f, " spi_cu_en_has_effect = %i\n", info->spi_cu_en_has_effect);
|
||||
fprintf(f, " max_shader_clock = %i MHz\n", info->max_shader_clock);
|
||||
fprintf(f, " num_good_compute_units = %i\n", info->num_good_compute_units);
|
||||
fprintf(f, " num_cu = %i\n", info->num_cu);
|
||||
fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
|
||||
fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
|
||||
fprintf(f, " max_se = %i\n", info->max_se);
|
||||
|
@ -1762,7 +1762,7 @@ ac_get_compute_resource_limits(const struct radeon_info *info, unsigned waves_pe
|
|||
unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
|
||||
|
||||
if (info->gfx_level >= GFX7) {
|
||||
unsigned num_cu_per_se = info->num_good_compute_units / info->num_se;
|
||||
unsigned num_cu_per_se = info->num_cu / info->num_se;
|
||||
|
||||
/* Gfx9 should set the limit to max instead of 0 to fix high priority compute. */
|
||||
if (info->gfx_level == GFX9 && !max_waves_per_sh) {
|
||||
|
|
|
@ -114,7 +114,7 @@ struct radeon_info {
|
|||
uint64_t gart_size;
|
||||
uint64_t vram_size;
|
||||
uint64_t vram_vis_size;
|
||||
uint32_t vram_bit_width;
|
||||
uint32_t memory_bus_width;
|
||||
uint32_t vram_type;
|
||||
uint32_t max_heap_size_kb;
|
||||
uint32_t min_alloc_size;
|
||||
|
@ -132,7 +132,7 @@ struct radeon_info {
|
|||
uint32_t lds_size_per_workgroup;
|
||||
uint32_t lds_alloc_granularity;
|
||||
uint32_t lds_encode_granularity;
|
||||
uint32_t max_memory_clock;
|
||||
uint32_t memory_freq_mhz;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
|
||||
|
@ -204,7 +204,7 @@ struct radeon_info {
|
|||
uint32_t cu_mask[AMD_MAX_SE][AMD_MAX_SA_PER_SE];
|
||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||
uint32_t max_shader_clock;
|
||||
uint32_t num_good_compute_units;
|
||||
uint32_t num_cu; /* only enabled CUs */
|
||||
uint32_t max_good_cu_per_sa;
|
||||
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
|
||||
uint32_t max_se; /* number of shader engines incl. disabled ones */
|
||||
|
|
|
@ -452,7 +452,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
chunk->flags |= SQTT_FILE_CHUNK_ASIC_INFO_FLAG_PS1_EVENT_TOKENS_ENABLED;
|
||||
|
||||
chunk->trace_shader_core_clock = rad_info->max_shader_clock * 1000000;
|
||||
chunk->trace_memory_clock = rad_info->max_memory_clock * 1000000;
|
||||
chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000;
|
||||
|
||||
/* RGP gets very confused if these clocks are 0. The numbers here are for profile_peak on
|
||||
* VGH since that is the chips where we've seen the need for this workaround. */
|
||||
|
@ -486,7 +486,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
chunk->ce_ram_size_graphics = 0;
|
||||
chunk->ce_ram_size_compute = 0;
|
||||
|
||||
chunk->vram_bus_width = rad_info->vram_bit_width;
|
||||
chunk->vram_bus_width = rad_info->memory_bus_width;
|
||||
chunk->vram_size = rad_info->vram_size;
|
||||
chunk->l2_cache_size = rad_info->l2_cache_size;
|
||||
chunk->l1_cache_size = rad_info->l1_cache_size;
|
||||
|
@ -507,7 +507,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
|
||||
chunk->gpu_timestamp_frequency = rad_info->clock_crystal_freq * 1000;
|
||||
chunk->max_shader_core_clock = rad_info->max_shader_clock * 1000000;
|
||||
chunk->max_memory_clock = rad_info->max_memory_clock * 1000000;
|
||||
chunk->max_memory_clock = rad_info->memory_freq_mhz * 1000000;
|
||||
chunk->memory_ops_per_clock = ac_memory_ops_per_clock(rad_info->vram_type);
|
||||
chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type);
|
||||
chunk->lds_granularity = rad_info->lds_encode_granularity;
|
||||
|
|
|
@ -2333,7 +2333,7 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
|
|||
(VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
|
||||
|
||||
properties->shaderCoreFeatures = 0;
|
||||
properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
|
||||
properties->activeComputeUnitCount = pdevice->rad_info.num_cu;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
|
||||
|
@ -3529,7 +3529,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
*/
|
||||
uint32_t max_threads_per_block = 2048;
|
||||
device->scratch_waves =
|
||||
MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
|
||||
MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64);
|
||||
|
||||
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
|
||||
|
||||
|
|
|
@ -210,8 +210,8 @@ enum {
|
|||
};
|
||||
|
||||
#define CTR_NUM_SIMD \
|
||||
CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_good_compute_units)
|
||||
#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_good_compute_units)
|
||||
CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu)
|
||||
#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu)
|
||||
|
||||
static void
|
||||
radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count,
|
||||
|
|
|
@ -286,7 +286,7 @@ radv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipelin
|
|||
MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave);
|
||||
|
||||
max_stage_waves =
|
||||
MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_good_compute_units *
|
||||
MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu *
|
||||
radv_get_max_waves(device, pipeline->shaders[i], i));
|
||||
max_waves = MAX2(max_waves, max_stage_waves);
|
||||
}
|
||||
|
|
|
@ -1029,7 +1029,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
|||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
|
||||
if (ret) {
|
||||
uint32_t *max_compute_units = ret;
|
||||
*max_compute_units = rscreen->info.num_good_compute_units;
|
||||
*max_compute_units = rscreen->info.num_cu;
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
|
||||
|
@ -1312,7 +1312,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
|||
|
||||
printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
|
||||
printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
|
||||
printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
|
||||
printf("num_cu = %i\n", rscreen->info.num_cu);
|
||||
printf("max_se = %i\n", rscreen->info.max_se);
|
||||
printf("max_sh_per_se = %i\n", rscreen->info.max_sa_per_se);
|
||||
|
||||
|
|
|
@ -427,7 +427,7 @@ static bool r600_query_sw_get_result(struct r600_common_context *rctx,
|
|||
result->u32 = 0;
|
||||
return true;
|
||||
case R600_QUERY_GPIN_NUM_SIMD:
|
||||
result->u32 = rctx->screen->info.num_good_compute_units;
|
||||
result->u32 = rctx->screen->info.num_cu;
|
||||
return true;
|
||||
case R600_QUERY_GPIN_NUM_RB:
|
||||
result->u32 = rctx->screen->info.max_render_backends;
|
||||
|
|
|
@ -934,7 +934,7 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
|
|||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
|
||||
if (ret) {
|
||||
uint32_t *max_compute_units = ret;
|
||||
*max_compute_units = sscreen->info.num_good_compute_units;
|
||||
*max_compute_units = sscreen->info.num_cu;
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
|
||||
|
|
|
@ -465,7 +465,7 @@ static bool si_query_sw_get_result(struct si_context *sctx, struct si_query *squ
|
|||
result->u32 = 0;
|
||||
return true;
|
||||
case SI_QUERY_GPIN_NUM_SIMD:
|
||||
result->u32 = sctx->screen->info.num_good_compute_units;
|
||||
result->u32 = sctx->screen->info.num_cu;
|
||||
return true;
|
||||
case SI_QUERY_GPIN_NUM_RB:
|
||||
result->u32 = sctx->screen->info.max_render_backends;
|
||||
|
|
|
@ -488,9 +488,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
&ws->info.r600_max_quad_pipes);
|
||||
|
||||
/* All GPUs have at least one compute unit */
|
||||
ws->info.num_good_compute_units = 1;
|
||||
ws->info.num_cu = 1;
|
||||
radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL,
|
||||
&ws->info.num_good_compute_units);
|
||||
&ws->info.num_cu);
|
||||
|
||||
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
|
||||
&ws->info.max_se);
|
||||
|
@ -546,7 +546,7 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
&ws->info.max_sa_per_se);
|
||||
if (ws->gen == DRV_SI) {
|
||||
ws->info.max_good_cu_per_sa =
|
||||
ws->info.min_good_cu_per_sa = ws->info.num_good_compute_units /
|
||||
ws->info.min_good_cu_per_sa = ws->info.num_cu /
|
||||
(ws->info.max_se * ws->info.max_sa_per_se);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue