ac/gpu_info: rework and extend device info to be more informative
This is the result with AMD_DEBUG=info: Device info: name = NAVI23 marketing_name = AMD Radeon RX 6600 num_se = 2 num_rb = 8 num_cu = 28 max_gpu_freq = 2750 MHz max_gflops = 9856 GFLOPS l0_cache_size = 16 KB l1_cache_size = 128 KB l2_cache_size = 2048 KB l3_cache_size = 32 MB memory_channels = 8 (TCC blocks) memory_size = 8 GB (8192 MB) memory_freq = 14 GHz memory_bus_width = 128 bits memory_bandwidth = 224 GB/s clock_crystal_freq = 100000 KHz IP GFX 10.3 queues:1 IP COMP 10.3 queues:4 IP SDMA 5.2 queues:2 IP VCN_DEC 3.0 queues:1 IP VCN_ENC 3.0 queues:1 IP VCN_JPG 3.0 queues:1 It might not be 100% correct with other chips. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411>
This commit is contained in:
parent
dd6b001775
commit
9552da66cc
|
@ -74,6 +74,19 @@
|
|||
#define AMDGPU_INFO_VIDEO_CAPS_DECODE 0
|
||||
#define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1
|
||||
#define AMDGPU_INFO_FW_GFX_MEC 0x08
|
||||
|
||||
#define AMDGPU_VRAM_TYPE_UNKNOWN 0
|
||||
#define AMDGPU_VRAM_TYPE_GDDR1 1
|
||||
#define AMDGPU_VRAM_TYPE_DDR2 2
|
||||
#define AMDGPU_VRAM_TYPE_GDDR3 3
|
||||
#define AMDGPU_VRAM_TYPE_GDDR4 4
|
||||
#define AMDGPU_VRAM_TYPE_GDDR5 5
|
||||
#define AMDGPU_VRAM_TYPE_HBM 6
|
||||
#define AMDGPU_VRAM_TYPE_DDR3 7
|
||||
#define AMDGPU_VRAM_TYPE_DDR4 8
|
||||
#define AMDGPU_VRAM_TYPE_GDDR6 9
|
||||
#define AMDGPU_VRAM_TYPE_DDR5 10
|
||||
|
||||
struct drm_amdgpu_heap_info {
|
||||
uint64_t total_heap_size;
|
||||
};
|
||||
|
@ -821,8 +834,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
/* Set hardware information. */
|
||||
/* convert the shader/memory clocks from KHz to MHz */
|
||||
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
|
||||
info->memory_freq_mhz = amdinfo->max_memory_clk / 1000;
|
||||
info->max_gpu_freq_mhz = amdinfo->max_engine_clk / 1000;
|
||||
info->memory_freq_mhz_effective = info->memory_freq_mhz = amdinfo->max_memory_clk / 1000;
|
||||
info->max_tcc_blocks = device_info.num_tcc_blocks;
|
||||
info->max_se = amdinfo->num_shader_engines;
|
||||
info->max_sa_per_se = amdinfo->num_shader_arrays_per_engine;
|
||||
|
@ -830,6 +843,23 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->vce_fw_version = info->ip[AMD_IP_VCE].num_queues ? vce_version : 0;
|
||||
info->has_video_hw.uvd_decode = info->ip[AMD_IP_UVD].num_queues != 0;
|
||||
|
||||
/* Based on MemoryOpsPerClockTable from PAL. */
|
||||
switch (info->vram_type) {
|
||||
case AMDGPU_VRAM_TYPE_DDR2:
|
||||
case AMDGPU_VRAM_TYPE_DDR3:
|
||||
case AMDGPU_VRAM_TYPE_DDR4: /* same for LPDDR4 */
|
||||
case AMDGPU_VRAM_TYPE_HBM: /* same for HBM2 and HBM3 */
|
||||
info->memory_freq_mhz_effective *= 2;
|
||||
break;
|
||||
case AMDGPU_VRAM_TYPE_DDR5: /* same for LPDDR5 */
|
||||
case AMDGPU_VRAM_TYPE_GDDR5:
|
||||
info->memory_freq_mhz_effective *= 4;
|
||||
break;
|
||||
case AMDGPU_VRAM_TYPE_GDDR6:
|
||||
info->memory_freq_mhz_effective *= 16;
|
||||
break;
|
||||
}
|
||||
|
||||
/* unified ring */
|
||||
info->has_video_hw.vcn_decode
|
||||
= info->family >= CHIP_GFX1100
|
||||
|
@ -1251,6 +1281,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */
|
||||
info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se,
|
||||
max_waves_per_tg);
|
||||
info->num_rb = util_bitcount(info->enabled_rb_mask);
|
||||
info->max_gflops = info->num_cu * 128 * info->max_gpu_freq_mhz / 1000;
|
||||
info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000);
|
||||
|
||||
if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) {
|
||||
info->l3_cache_size_mb = info->num_tcc_blocks *
|
||||
(info->family == CHIP_NAVI21 ||
|
||||
info->family == CHIP_NAVI22 ? 8 : 4);
|
||||
}
|
||||
|
||||
set_custom_cu_en_mask(info);
|
||||
|
||||
|
@ -1309,24 +1348,36 @@ void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size)
|
|||
void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
||||
{
|
||||
fprintf(f, "Device info:\n");
|
||||
fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus,
|
||||
info->pci_dev, info->pci_func);
|
||||
|
||||
fprintf(f, " name = %s\n", info->name);
|
||||
fprintf(f, " lowercase_name = %s\n", info->lowercase_name);
|
||||
fprintf(f, " marketing_name = %s\n", info->marketing_name);
|
||||
fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics);
|
||||
fprintf(f, " pci_id = 0x%x\n", info->pci_id);
|
||||
fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id);
|
||||
fprintf(f, " family = %i\n", info->family);
|
||||
fprintf(f, " gfx_level = %i\n", info->gfx_level);
|
||||
fprintf(f, " family_id = %i\n", info->family_id);
|
||||
fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev);
|
||||
fprintf(f, " num_se = %i\n", info->num_se);
|
||||
fprintf(f, " num_rb = %i\n", info->num_rb);
|
||||
fprintf(f, " num_cu = %i\n", info->num_cu);
|
||||
fprintf(f, " max_gpu_freq = %i MHz\n", info->max_gpu_freq_mhz);
|
||||
fprintf(f, " max_gflops = %u GFLOPS\n", info->max_gflops);
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
fprintf(f, " l0_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
|
||||
fprintf(f, " l1_cache_size = %i KB\n", 128);
|
||||
} else {
|
||||
fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
|
||||
}
|
||||
|
||||
fprintf(f, " l2_cache_size = %i KB\n", DIV_ROUND_UP(info->l2_cache_size, 1024));
|
||||
|
||||
if (info->l3_cache_size_mb)
|
||||
fprintf(f, " l3_cache_size = %i MB\n", info->l3_cache_size_mb);
|
||||
|
||||
fprintf(f, " memory_channels = %u (TCC blocks)\n", info->num_tcc_blocks);
|
||||
fprintf(f, " memory_size = %u GB (%u MB)\n",
|
||||
DIV_ROUND_UP(info->vram_size_kb, (1024 * 1024)),
|
||||
DIV_ROUND_UP(info->vram_size_kb, 1024));
|
||||
fprintf(f, " memory_freq = %u GHz\n", DIV_ROUND_UP(info->memory_freq_mhz_effective, 1000));
|
||||
fprintf(f, " memory_bus_width = %u bits\n", info->memory_bus_width);
|
||||
fprintf(f, " memory_bandwidth = %u GB/s\n", info->memory_bandwidth_gbps);
|
||||
fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq);
|
||||
|
||||
fprintf(f, "Features:\n");
|
||||
|
||||
static const char *ip_string[] = {
|
||||
const char *ip_string[] = {
|
||||
[AMD_IP_GFX] = "GFX",
|
||||
[AMD_IP_COMPUTE] = "COMP",
|
||||
[AMD_IP_SDMA] = "SDMA",
|
||||
|
@ -1334,20 +1385,29 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
[AMD_IP_VCE] = "VCE",
|
||||
[AMD_IP_UVD_ENC] = "UVD_ENC",
|
||||
[AMD_IP_VCN_DEC] = "VCN_DEC",
|
||||
[AMD_IP_VCN_ENC] = "VCN_ENC",
|
||||
[AMD_IP_VCN_ENC] = info->family >= CHIP_GFX1100 ? "VCN" : "VCN_ENC",
|
||||
[AMD_IP_VCN_JPEG] = "VCN_JPG",
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
||||
if (info->ip[i].num_queues) {
|
||||
fprintf(f, " IP %-4s %2u.%u \tqueues:%u\n", ip_string[i],
|
||||
fprintf(f, " IP %-7s %2u.%u \tqueues:%u\n", ip_string[i],
|
||||
info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues);
|
||||
}
|
||||
}
|
||||
|
||||
if (info->family >= CHIP_GFX1100)
|
||||
ip_string[AMD_IP_VCN_UNIFIED] = "VCN_UNIFIED";
|
||||
fprintf(f, "Identification:\n");
|
||||
fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus,
|
||||
info->pci_dev, info->pci_func);
|
||||
fprintf(f, " pci_id = 0x%x\n", info->pci_id);
|
||||
fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id);
|
||||
fprintf(f, " family = %i\n", info->family);
|
||||
fprintf(f, " gfx_level = %i\n", info->gfx_level);
|
||||
fprintf(f, " family_id = %i\n", info->family_id);
|
||||
fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev);
|
||||
|
||||
fprintf(f, "Flags:\n");
|
||||
fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics);
|
||||
fprintf(f, " has_graphics = %i\n", info->has_graphics);
|
||||
fprintf(f, " has_clear_state = %u\n", info->has_clear_state);
|
||||
fprintf(f, " has_distributed_tess = %u\n", info->has_distributed_tess);
|
||||
|
@ -1380,7 +1440,6 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024 * 1024));
|
||||
fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024));
|
||||
fprintf(f, " vram_type = %i\n", info->vram_type);
|
||||
fprintf(f, " memory_bus_width = %i\n", info->memory_bus_width);
|
||||
fprintf(f, " max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024));
|
||||
fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size);
|
||||
fprintf(f, " address32_hi = 0x%x\n", info->address32_hi);
|
||||
|
@ -1388,16 +1447,13 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " all_vram_visible = %u\n", info->all_vram_visible);
|
||||
fprintf(f, " smart_access_memory = %u\n", info->smart_access_memory);
|
||||
fprintf(f, " max_tcc_blocks = %i\n", info->max_tcc_blocks);
|
||||
fprintf(f, " num_tcc_blocks = %i\n", info->num_tcc_blocks);
|
||||
fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
|
||||
fprintf(f, " tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent);
|
||||
fprintf(f, " pc_lines = %u\n", info->pc_lines);
|
||||
fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
|
||||
fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity);
|
||||
fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity);
|
||||
fprintf(f, " memory_freq = %i MHz\n", info->memory_freq_mhz);
|
||||
fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size);
|
||||
fprintf(f, " l2_cache_size = %i\n", info->l2_cache_size);
|
||||
fprintf(f, " max_memory_clock = %i MHz\n", info->memory_freq_mhz);
|
||||
|
||||
fprintf(f, "CP info:\n");
|
||||
fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
|
||||
|
@ -1461,12 +1517,9 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
}
|
||||
}
|
||||
fprintf(f, " spi_cu_en_has_effect = %i\n", info->spi_cu_en_has_effect);
|
||||
fprintf(f, " max_shader_clock = %i MHz\n", info->max_shader_clock);
|
||||
fprintf(f, " num_cu = %i\n", info->num_cu);
|
||||
fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
|
||||
fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
|
||||
fprintf(f, " max_se = %i\n", info->max_se);
|
||||
fprintf(f, " num_se = %i\n", info->num_se);
|
||||
fprintf(f, " max_sa_per_se = %i\n", info->max_sa_per_se);
|
||||
fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
|
||||
fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd);
|
||||
|
|
|
@ -49,27 +49,42 @@ struct amd_ip_info {
|
|||
};
|
||||
|
||||
struct radeon_info {
|
||||
/* Device info. */
|
||||
const char *name;
|
||||
char lowercase_name[32];
|
||||
const char *marketing_name;
|
||||
uint32_t num_se; /* only enabled SEs */
|
||||
uint32_t num_rb; /* only enabled RBs */
|
||||
uint32_t num_cu; /* only enabled CUs */
|
||||
uint32_t max_gpu_freq_mhz; /* also known as the shader clock */
|
||||
uint32_t max_gflops;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
uint32_t l3_cache_size_mb;
|
||||
uint32_t num_tcc_blocks; /* also the number of memory channels */
|
||||
uint32_t memory_freq_mhz;
|
||||
uint32_t memory_freq_mhz_effective;
|
||||
uint32_t memory_bus_width;
|
||||
uint32_t memory_bandwidth_gbps;
|
||||
uint32_t clock_crystal_freq;
|
||||
struct amd_ip_info ip[AMD_NUM_IP_TYPES];
|
||||
|
||||
/* Identification. */
|
||||
/* PCI info: domain:bus:dev:func */
|
||||
uint32_t pci_domain;
|
||||
uint32_t pci_bus;
|
||||
uint32_t pci_dev;
|
||||
uint32_t pci_func;
|
||||
|
||||
/* Device info. */
|
||||
const char *name;
|
||||
char lowercase_name[32];
|
||||
const char *marketing_name;
|
||||
bool is_pro_graphics;
|
||||
uint32_t pci_id;
|
||||
uint32_t pci_rev_id;
|
||||
enum radeon_family family;
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint32_t family_id;
|
||||
uint32_t chip_external_rev;
|
||||
uint32_t clock_crystal_freq;
|
||||
|
||||
/* Features. */
|
||||
struct amd_ip_info ip[AMD_NUM_IP_TYPES];
|
||||
/* Flags. */
|
||||
bool is_pro_graphics;
|
||||
bool has_graphics; /* false if the chip is compute-only */
|
||||
uint32_t ib_pad_dw_mask[AMD_NUM_IP_TYPES];
|
||||
bool has_clear_state;
|
||||
|
@ -114,7 +129,6 @@ struct radeon_info {
|
|||
uint64_t gart_size;
|
||||
uint64_t vram_size;
|
||||
uint64_t vram_vis_size;
|
||||
uint32_t memory_bus_width;
|
||||
uint32_t vram_type;
|
||||
uint32_t max_heap_size_kb;
|
||||
uint32_t min_alloc_size;
|
||||
|
@ -125,16 +139,12 @@ struct radeon_info {
|
|||
bool has_l2_uncached;
|
||||
bool r600_has_virtual_memory;
|
||||
uint32_t max_tcc_blocks;
|
||||
uint32_t num_tcc_blocks;
|
||||
uint32_t tcc_cache_line_size;
|
||||
bool tcc_rb_non_coherent; /* whether L2 inv is needed for render->texture transitions */
|
||||
unsigned pc_lines;
|
||||
uint32_t lds_size_per_workgroup;
|
||||
uint32_t lds_alloc_granularity;
|
||||
uint32_t lds_encode_granularity;
|
||||
uint32_t memory_freq_mhz;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
|
||||
/* CP info. */
|
||||
bool gfx_ib_pad_with_type2;
|
||||
|
@ -203,12 +213,9 @@ struct radeon_info {
|
|||
/* Shader cores. */
|
||||
uint32_t cu_mask[AMD_MAX_SE][AMD_MAX_SA_PER_SE];
|
||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||
uint32_t max_shader_clock;
|
||||
uint32_t num_cu; /* only enabled CUs */
|
||||
uint32_t max_good_cu_per_sa;
|
||||
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
|
||||
uint32_t max_se; /* number of shader engines incl. disabled ones */
|
||||
uint32_t num_se; /* number of enabled shader engines */
|
||||
uint32_t max_sa_per_se; /* shader arrays per shader engine */
|
||||
uint32_t max_wave64_per_simd;
|
||||
uint32_t num_physical_sgprs_per_simd;
|
||||
|
|
|
@ -451,8 +451,8 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
if (rad_info->gfx_level >= GFX9)
|
||||
chunk->flags |= SQTT_FILE_CHUNK_ASIC_INFO_FLAG_PS1_EVENT_TOKENS_ENABLED;
|
||||
|
||||
chunk->trace_shader_core_clock = rad_info->max_shader_clock * 1000000;
|
||||
chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000;
|
||||
chunk->trace_shader_core_clock = rad_info->max_gpu_freq_mhz * 1000000ull;
|
||||
chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000ull;
|
||||
|
||||
/* RGP gets very confused if these clocks are 0. The numbers here are for profile_peak on
|
||||
* VGH since that is the chips where we've seen the need for this workaround. */
|
||||
|
@ -506,7 +506,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
chunk->pixels_per_clock = 0.0;
|
||||
|
||||
chunk->gpu_timestamp_frequency = rad_info->clock_crystal_freq * 1000;
|
||||
chunk->max_shader_core_clock = rad_info->max_shader_clock * 1000000;
|
||||
chunk->max_shader_core_clock = rad_info->max_gpu_freq_mhz * 1000000;
|
||||
chunk->max_memory_clock = rad_info->memory_freq_mhz * 1000000;
|
||||
chunk->memory_ops_per_clock = ac_memory_ops_per_clock(rad_info->vram_type);
|
||||
chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type);
|
||||
|
|
|
@ -1022,7 +1022,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
|||
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
|
||||
if (ret) {
|
||||
uint32_t *max_clock_frequency = ret;
|
||||
*max_clock_frequency = rscreen->info.max_shader_clock;
|
||||
*max_clock_frequency = rscreen->info.max_gpu_freq_mhz;
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
|
||||
|
@ -1311,7 +1311,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
|||
printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
|
||||
|
||||
printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
|
||||
printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
|
||||
printf("max_gpu_freq_mhz = %i\n", rscreen->info.max_gpu_freq_mhz);
|
||||
printf("num_cu = %i\n", rscreen->info.num_cu);
|
||||
printf("max_se = %i\n", rscreen->info.max_se);
|
||||
printf("max_sh_per_se = %i\n", rscreen->info.max_sa_per_se);
|
||||
|
|
|
@ -927,7 +927,7 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
|
|||
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
|
||||
if (ret) {
|
||||
uint32_t *max_clock_frequency = ret;
|
||||
*max_clock_frequency = sscreen->info.max_shader_clock;
|
||||
*max_clock_frequency = sscreen->info.max_gpu_freq_mhz;
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
|
||||
|
|
|
@ -392,8 +392,8 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
|
||||
/* Get max clock frequency info and convert it to MHz */
|
||||
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
|
||||
&ws->info.max_shader_clock);
|
||||
ws->info.max_shader_clock /= 1000;
|
||||
&ws->info.max_gpu_freq_mhz);
|
||||
ws->info.max_gpu_freq_mhz /= 1000;
|
||||
|
||||
ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
|
|
Loading…
Reference in New Issue