ac/gpu_info: rework and extend device info to be more informative

This is the result with AMD_DEBUG=info:

Device info:
    name = NAVI23
    marketing_name = AMD Radeon RX 6600
    num_se = 2
    num_rb = 8
    num_cu = 28
    max_gpu_freq = 2750 MHz
    max_gflops = 9856 GFLOPS
    l0_cache_size = 16 KB
    l1_cache_size = 128 KB
    l2_cache_size = 2048 KB
    l3_cache_size = 32 MB
    memory_channels = 8 (TCC blocks)
    memory_size = 8 GB (8192 MB)
    memory_freq = 14 GHz
    memory_bus_width = 128 bits
    memory_bandwidth = 224 GB/s
    clock_crystal_freq = 100000 KHz
    IP GFX     10.3 	queues:1
    IP COMP    10.3 	queues:4
    IP SDMA     5.2 	queues:2
    IP VCN_DEC  3.0 	queues:1
    IP VCN_ENC  3.0 	queues:1
    IP VCN_JPG  3.0 	queues:1

It might not be 100% correct with other chips.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411>
This commit is contained in:
Marek Olšák 2022-07-03 15:27:59 -04:00 committed by Marge Bot
parent dd6b001775
commit 9552da66cc
6 changed files with 112 additions and 52 deletions

View File

@ -74,6 +74,19 @@
#define AMDGPU_INFO_VIDEO_CAPS_DECODE 0
#define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1
#define AMDGPU_INFO_FW_GFX_MEC 0x08
#define AMDGPU_VRAM_TYPE_UNKNOWN 0
#define AMDGPU_VRAM_TYPE_GDDR1 1
#define AMDGPU_VRAM_TYPE_DDR2 2
#define AMDGPU_VRAM_TYPE_GDDR3 3
#define AMDGPU_VRAM_TYPE_GDDR4 4
#define AMDGPU_VRAM_TYPE_GDDR5 5
#define AMDGPU_VRAM_TYPE_HBM 6
#define AMDGPU_VRAM_TYPE_DDR3 7
#define AMDGPU_VRAM_TYPE_DDR4 8
#define AMDGPU_VRAM_TYPE_GDDR6 9
#define AMDGPU_VRAM_TYPE_DDR5 10
struct drm_amdgpu_heap_info {
uint64_t total_heap_size;
};
@ -821,8 +834,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
/* Set hardware information. */
/* convert the shader/memory clocks from KHz to MHz */
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
info->memory_freq_mhz = amdinfo->max_memory_clk / 1000;
info->max_gpu_freq_mhz = amdinfo->max_engine_clk / 1000;
info->memory_freq_mhz_effective = info->memory_freq_mhz = amdinfo->max_memory_clk / 1000;
info->max_tcc_blocks = device_info.num_tcc_blocks;
info->max_se = amdinfo->num_shader_engines;
info->max_sa_per_se = amdinfo->num_shader_arrays_per_engine;
@ -830,6 +843,23 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->vce_fw_version = info->ip[AMD_IP_VCE].num_queues ? vce_version : 0;
info->has_video_hw.uvd_decode = info->ip[AMD_IP_UVD].num_queues != 0;
/* Based on MemoryOpsPerClockTable from PAL. */
switch (info->vram_type) {
case AMDGPU_VRAM_TYPE_DDR2:
case AMDGPU_VRAM_TYPE_DDR3:
case AMDGPU_VRAM_TYPE_DDR4: /* same for LPDDR4 */
case AMDGPU_VRAM_TYPE_HBM: /* same for HBM2 and HBM3 */
info->memory_freq_mhz_effective *= 2;
break;
case AMDGPU_VRAM_TYPE_DDR5: /* same for LPDDR5 */
case AMDGPU_VRAM_TYPE_GDDR5:
info->memory_freq_mhz_effective *= 4;
break;
case AMDGPU_VRAM_TYPE_GDDR6:
info->memory_freq_mhz_effective *= 16;
break;
}
/* unified ring */
info->has_video_hw.vcn_decode
= info->family >= CHIP_GFX1100
@ -1251,6 +1281,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */
info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se,
max_waves_per_tg);
info->num_rb = util_bitcount(info->enabled_rb_mask);
info->max_gflops = info->num_cu * 128 * info->max_gpu_freq_mhz / 1000;
info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000);
if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) {
info->l3_cache_size_mb = info->num_tcc_blocks *
(info->family == CHIP_NAVI21 ||
info->family == CHIP_NAVI22 ? 8 : 4);
}
set_custom_cu_en_mask(info);
@ -1309,24 +1348,36 @@ void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size)
void ac_print_gpu_info(struct radeon_info *info, FILE *f)
{
fprintf(f, "Device info:\n");
fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus,
info->pci_dev, info->pci_func);
fprintf(f, " name = %s\n", info->name);
fprintf(f, " lowercase_name = %s\n", info->lowercase_name);
fprintf(f, " marketing_name = %s\n", info->marketing_name);
fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics);
fprintf(f, " pci_id = 0x%x\n", info->pci_id);
fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id);
fprintf(f, " family = %i\n", info->family);
fprintf(f, " gfx_level = %i\n", info->gfx_level);
fprintf(f, " family_id = %i\n", info->family_id);
fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev);
fprintf(f, " num_se = %i\n", info->num_se);
fprintf(f, " num_rb = %i\n", info->num_rb);
fprintf(f, " num_cu = %i\n", info->num_cu);
fprintf(f, " max_gpu_freq = %i MHz\n", info->max_gpu_freq_mhz);
fprintf(f, " max_gflops = %u GFLOPS\n", info->max_gflops);
if (info->gfx_level >= GFX10) {
fprintf(f, " l0_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
fprintf(f, " l1_cache_size = %i KB\n", 128);
} else {
fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
}
fprintf(f, " l2_cache_size = %i KB\n", DIV_ROUND_UP(info->l2_cache_size, 1024));
if (info->l3_cache_size_mb)
fprintf(f, " l3_cache_size = %i MB\n", info->l3_cache_size_mb);
fprintf(f, " memory_channels = %u (TCC blocks)\n", info->num_tcc_blocks);
fprintf(f, " memory_size = %u GB (%u MB)\n",
DIV_ROUND_UP(info->vram_size_kb, (1024 * 1024)),
DIV_ROUND_UP(info->vram_size_kb, 1024));
fprintf(f, " memory_freq = %u GHz\n", DIV_ROUND_UP(info->memory_freq_mhz_effective, 1000));
fprintf(f, " memory_bus_width = %u bits\n", info->memory_bus_width);
fprintf(f, " memory_bandwidth = %u GB/s\n", info->memory_bandwidth_gbps);
fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq);
fprintf(f, "Features:\n");
static const char *ip_string[] = {
const char *ip_string[] = {
[AMD_IP_GFX] = "GFX",
[AMD_IP_COMPUTE] = "COMP",
[AMD_IP_SDMA] = "SDMA",
@ -1334,20 +1385,29 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
[AMD_IP_VCE] = "VCE",
[AMD_IP_UVD_ENC] = "UVD_ENC",
[AMD_IP_VCN_DEC] = "VCN_DEC",
[AMD_IP_VCN_ENC] = "VCN_ENC",
[AMD_IP_VCN_ENC] = info->family >= CHIP_GFX1100 ? "VCN" : "VCN_ENC",
[AMD_IP_VCN_JPEG] = "VCN_JPG",
};
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
if (info->ip[i].num_queues) {
fprintf(f, " IP %-4s %2u.%u \tqueues:%u\n", ip_string[i],
fprintf(f, " IP %-7s %2u.%u \tqueues:%u\n", ip_string[i],
info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues);
}
}
if (info->family >= CHIP_GFX1100)
ip_string[AMD_IP_VCN_UNIFIED] = "VCN_UNIFIED";
fprintf(f, "Identification:\n");
fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus,
info->pci_dev, info->pci_func);
fprintf(f, " pci_id = 0x%x\n", info->pci_id);
fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id);
fprintf(f, " family = %i\n", info->family);
fprintf(f, " gfx_level = %i\n", info->gfx_level);
fprintf(f, " family_id = %i\n", info->family_id);
fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev);
fprintf(f, "Flags:\n");
fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics);
fprintf(f, " has_graphics = %i\n", info->has_graphics);
fprintf(f, " has_clear_state = %u\n", info->has_clear_state);
fprintf(f, " has_distributed_tess = %u\n", info->has_distributed_tess);
@ -1380,7 +1440,6 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024 * 1024));
fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024));
fprintf(f, " vram_type = %i\n", info->vram_type);
fprintf(f, " memory_bus_width = %i\n", info->memory_bus_width);
fprintf(f, " max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024));
fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size);
fprintf(f, " address32_hi = 0x%x\n", info->address32_hi);
@ -1388,16 +1447,13 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " all_vram_visible = %u\n", info->all_vram_visible);
fprintf(f, " smart_access_memory = %u\n", info->smart_access_memory);
fprintf(f, " max_tcc_blocks = %i\n", info->max_tcc_blocks);
fprintf(f, " num_tcc_blocks = %i\n", info->num_tcc_blocks);
fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
fprintf(f, " tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent);
fprintf(f, " pc_lines = %u\n", info->pc_lines);
fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity);
fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity);
fprintf(f, " memory_freq = %i MHz\n", info->memory_freq_mhz);
fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size);
fprintf(f, " l2_cache_size = %i\n", info->l2_cache_size);
fprintf(f, " max_memory_clock = %i MHz\n", info->memory_freq_mhz);
fprintf(f, "CP info:\n");
fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
@ -1461,12 +1517,9 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
}
}
fprintf(f, " spi_cu_en_has_effect = %i\n", info->spi_cu_en_has_effect);
fprintf(f, " max_shader_clock = %i MHz\n", info->max_shader_clock);
fprintf(f, " num_cu = %i\n", info->num_cu);
fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
fprintf(f, " max_se = %i\n", info->max_se);
fprintf(f, " num_se = %i\n", info->num_se);
fprintf(f, " max_sa_per_se = %i\n", info->max_sa_per_se);
fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd);

View File

@ -49,27 +49,42 @@ struct amd_ip_info {
};
struct radeon_info {
/* Device info. */
const char *name;
char lowercase_name[32];
const char *marketing_name;
uint32_t num_se; /* only enabled SEs */
uint32_t num_rb; /* only enabled RBs */
uint32_t num_cu; /* only enabled CUs */
uint32_t max_gpu_freq_mhz; /* also known as the shader clock */
uint32_t max_gflops;
uint32_t l1_cache_size;
uint32_t l2_cache_size;
uint32_t l3_cache_size_mb;
uint32_t num_tcc_blocks; /* also the number of memory channels */
uint32_t memory_freq_mhz;
uint32_t memory_freq_mhz_effective;
uint32_t memory_bus_width;
uint32_t memory_bandwidth_gbps;
uint32_t clock_crystal_freq;
struct amd_ip_info ip[AMD_NUM_IP_TYPES];
/* Identification. */
/* PCI info: domain:bus:dev:func */
uint32_t pci_domain;
uint32_t pci_bus;
uint32_t pci_dev;
uint32_t pci_func;
/* Device info. */
const char *name;
char lowercase_name[32];
const char *marketing_name;
bool is_pro_graphics;
uint32_t pci_id;
uint32_t pci_rev_id;
enum radeon_family family;
enum amd_gfx_level gfx_level;
uint32_t family_id;
uint32_t chip_external_rev;
uint32_t clock_crystal_freq;
/* Features. */
struct amd_ip_info ip[AMD_NUM_IP_TYPES];
/* Flags. */
bool is_pro_graphics;
bool has_graphics; /* false if the chip is compute-only */
uint32_t ib_pad_dw_mask[AMD_NUM_IP_TYPES];
bool has_clear_state;
@ -114,7 +129,6 @@ struct radeon_info {
uint64_t gart_size;
uint64_t vram_size;
uint64_t vram_vis_size;
uint32_t memory_bus_width;
uint32_t vram_type;
uint32_t max_heap_size_kb;
uint32_t min_alloc_size;
@ -125,16 +139,12 @@ struct radeon_info {
bool has_l2_uncached;
bool r600_has_virtual_memory;
uint32_t max_tcc_blocks;
uint32_t num_tcc_blocks;
uint32_t tcc_cache_line_size;
bool tcc_rb_non_coherent; /* whether L2 inv is needed for render->texture transitions */
unsigned pc_lines;
uint32_t lds_size_per_workgroup;
uint32_t lds_alloc_granularity;
uint32_t lds_encode_granularity;
uint32_t memory_freq_mhz;
uint32_t l1_cache_size;
uint32_t l2_cache_size;
/* CP info. */
bool gfx_ib_pad_with_type2;
@ -203,12 +213,9 @@ struct radeon_info {
/* Shader cores. */
uint32_t cu_mask[AMD_MAX_SE][AMD_MAX_SA_PER_SE];
uint32_t r600_max_quad_pipes; /* wave size / 16 */
uint32_t max_shader_clock;
uint32_t num_cu; /* only enabled CUs */
uint32_t max_good_cu_per_sa;
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
uint32_t max_se; /* number of shader engines incl. disabled ones */
uint32_t num_se; /* number of enabled shader engines */
uint32_t max_sa_per_se; /* shader arrays per shader engine */
uint32_t max_wave64_per_simd;
uint32_t num_physical_sgprs_per_simd;

View File

@ -451,8 +451,8 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
if (rad_info->gfx_level >= GFX9)
chunk->flags |= SQTT_FILE_CHUNK_ASIC_INFO_FLAG_PS1_EVENT_TOKENS_ENABLED;
chunk->trace_shader_core_clock = rad_info->max_shader_clock * 1000000;
chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000;
chunk->trace_shader_core_clock = rad_info->max_gpu_freq_mhz * 1000000ull;
chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000ull;
/* RGP gets very confused if these clocks are 0. The numbers here are for profile_peak on
* VGH since that is the chips where we've seen the need for this workaround. */
@ -506,7 +506,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
chunk->pixels_per_clock = 0.0;
chunk->gpu_timestamp_frequency = rad_info->clock_crystal_freq * 1000;
chunk->max_shader_core_clock = rad_info->max_shader_clock * 1000000;
chunk->max_shader_core_clock = rad_info->max_gpu_freq_mhz * 1000000;
chunk->max_memory_clock = rad_info->memory_freq_mhz * 1000000;
chunk->memory_ops_per_clock = ac_memory_ops_per_clock(rad_info->vram_type);
chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type);

View File

@ -1022,7 +1022,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
if (ret) {
uint32_t *max_clock_frequency = ret;
*max_clock_frequency = rscreen->info.max_shader_clock;
*max_clock_frequency = rscreen->info.max_gpu_freq_mhz;
}
return sizeof(uint32_t);
@ -1311,7 +1311,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
printf("max_gpu_freq_mhz = %i\n", rscreen->info.max_gpu_freq_mhz);
printf("num_cu = %i\n", rscreen->info.num_cu);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sa_per_se);

View File

@ -927,7 +927,7 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
if (ret) {
uint32_t *max_clock_frequency = ret;
*max_clock_frequency = sscreen->info.max_shader_clock;
*max_clock_frequency = sscreen->info.max_gpu_freq_mhz;
}
return sizeof(uint32_t);

View File

@ -392,8 +392,8 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
/* Get max clock frequency info and convert it to MHz */
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
&ws->info.max_shader_clock);
ws->info.max_shader_clock /= 1000;
&ws->info.max_gpu_freq_mhz);
ws->info.max_gpu_freq_mhz /= 1000;
ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);