From 9552da66cc9fb640bcf37826b193c75ad1fb8848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 3 Jul 2022 15:27:59 -0400 Subject: [PATCH] ac/gpu_info: rework and extend device info to be more informative This is the result with AMD_DEBUG=info: Device info: name = NAVI23 marketing_name = AMD Radeon RX 6600 num_se = 2 num_rb = 8 num_cu = 28 max_gpu_freq = 2750 MHz max_gflops = 9856 GFLOPS l0_cache_size = 16 KB l1_cache_size = 128 KB l2_cache_size = 2048 KB l3_cache_size = 32 MB memory_channels = 8 (TCC blocks) memory_size = 8 GB (8192 MB) memory_freq = 14 GHz memory_bus_width = 128 bits memory_bandwidth = 224 GB/s clock_crystal_freq = 100000 KHz IP GFX 10.3 queues:1 IP COMP 10.3 queues:4 IP SDMA 5.2 queues:2 IP VCN_DEC 3.0 queues:1 IP VCN_ENC 3.0 queues:1 IP VCN_JPG 3.0 queues:1 It might not be 100% correct with other chips. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_gpu_info.c | 109 +++++++++++++----- src/amd/common/ac_gpu_info.h | 39 ++++--- src/amd/common/ac_rgp.c | 6 +- src/gallium/drivers/r600/r600_pipe_common.c | 4 +- src/gallium/drivers/radeonsi/si_get.c | 2 +- .../winsys/radeon/drm/radeon_drm_winsys.c | 4 +- 6 files changed, 112 insertions(+), 52 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 31a74a9749f..972638e171d 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -74,6 +74,19 @@ #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 #define AMDGPU_INFO_FW_GFX_MEC 0x08 + +#define AMDGPU_VRAM_TYPE_UNKNOWN 0 +#define AMDGPU_VRAM_TYPE_GDDR1 1 +#define AMDGPU_VRAM_TYPE_DDR2 2 +#define AMDGPU_VRAM_TYPE_GDDR3 3 +#define AMDGPU_VRAM_TYPE_GDDR4 4 +#define AMDGPU_VRAM_TYPE_GDDR5 5 +#define AMDGPU_VRAM_TYPE_HBM 6 +#define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 +#define AMDGPU_VRAM_TYPE_GDDR6 9 +#define AMDGPU_VRAM_TYPE_DDR5 10 + struct drm_amdgpu_heap_info { uint64_t total_heap_size; }; @@ -821,8 +834,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, /* Set hardware information. */ /* convert the shader/memory clocks from KHz to MHz */ - info->max_shader_clock = amdinfo->max_engine_clk / 1000; - info->memory_freq_mhz = amdinfo->max_memory_clk / 1000; + info->max_gpu_freq_mhz = amdinfo->max_engine_clk / 1000; + info->memory_freq_mhz_effective = info->memory_freq_mhz = amdinfo->max_memory_clk / 1000; info->max_tcc_blocks = device_info.num_tcc_blocks; info->max_se = amdinfo->num_shader_engines; info->max_sa_per_se = amdinfo->num_shader_arrays_per_engine; @@ -830,6 +843,23 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->vce_fw_version = info->ip[AMD_IP_VCE].num_queues ? vce_version : 0; info->has_video_hw.uvd_decode = info->ip[AMD_IP_UVD].num_queues != 0; + /* Based on MemoryOpsPerClockTable from PAL. */ + switch (info->vram_type) { + case AMDGPU_VRAM_TYPE_DDR2: + case AMDGPU_VRAM_TYPE_DDR3: + case AMDGPU_VRAM_TYPE_DDR4: /* same for LPDDR4 */ + case AMDGPU_VRAM_TYPE_HBM: /* same for HBM2 and HBM3 */ + info->memory_freq_mhz_effective *= 2; + break; + case AMDGPU_VRAM_TYPE_DDR5: /* same for LPDDR5 */ + case AMDGPU_VRAM_TYPE_GDDR5: + info->memory_freq_mhz_effective *= 4; + break; + case AMDGPU_VRAM_TYPE_GDDR6: + info->memory_freq_mhz_effective *= 16; + break; + } + /* unified ring */ info->has_video_hw.vcn_decode = info->family >= CHIP_GFX1100 @@ -1251,6 +1281,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */ info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se, max_waves_per_tg); + info->num_rb = util_bitcount(info->enabled_rb_mask); + info->max_gflops = info->num_cu * 128 * info->max_gpu_freq_mhz / 1000; + info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000); + + if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) { + info->l3_cache_size_mb = info->num_tcc_blocks * + (info->family == CHIP_NAVI21 || + info->family == CHIP_NAVI22 ? 8 : 4); + } set_custom_cu_en_mask(info); @@ -1309,24 +1348,36 @@ void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size) void ac_print_gpu_info(struct radeon_info *info, FILE *f) { fprintf(f, "Device info:\n"); - fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus, - info->pci_dev, info->pci_func); - fprintf(f, " name = %s\n", info->name); - fprintf(f, " lowercase_name = %s\n", info->lowercase_name); fprintf(f, " marketing_name = %s\n", info->marketing_name); - fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics); - fprintf(f, " pci_id = 0x%x\n", info->pci_id); - fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id); - fprintf(f, " family = %i\n", info->family); - fprintf(f, " gfx_level = %i\n", info->gfx_level); - fprintf(f, " family_id = %i\n", info->family_id); - fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev); + fprintf(f, " num_se = %i\n", info->num_se); + fprintf(f, " num_rb = %i\n", info->num_rb); + fprintf(f, " num_cu = %i\n", info->num_cu); + fprintf(f, " max_gpu_freq = %i MHz\n", info->max_gpu_freq_mhz); + fprintf(f, " max_gflops = %u GFLOPS\n", info->max_gflops); + + if (info->gfx_level >= GFX10) { + fprintf(f, " l0_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); + fprintf(f, " l1_cache_size = %i KB\n", 128); + } else { + fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); + } + + fprintf(f, " l2_cache_size = %i KB\n", DIV_ROUND_UP(info->l2_cache_size, 1024)); + + if (info->l3_cache_size_mb) + fprintf(f, " l3_cache_size = %i MB\n", info->l3_cache_size_mb); + + fprintf(f, " memory_channels = %u (TCC blocks)\n", info->num_tcc_blocks); + fprintf(f, " memory_size = %u GB (%u MB)\n", + DIV_ROUND_UP(info->vram_size_kb, (1024 * 1024)), + DIV_ROUND_UP(info->vram_size_kb, 1024)); + fprintf(f, " memory_freq = %u GHz\n", DIV_ROUND_UP(info->memory_freq_mhz_effective, 1000)); + fprintf(f, " memory_bus_width = %u bits\n", info->memory_bus_width); + fprintf(f, " memory_bandwidth = %u GB/s\n", info->memory_bandwidth_gbps); fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq); - fprintf(f, "Features:\n"); - - static const char *ip_string[] = { + const char *ip_string[] = { [AMD_IP_GFX] = "GFX", [AMD_IP_COMPUTE] = "COMP", [AMD_IP_SDMA] = "SDMA", @@ -1334,20 +1385,29 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) [AMD_IP_VCE] = "VCE", [AMD_IP_UVD_ENC] = "UVD_ENC", [AMD_IP_VCN_DEC] = "VCN_DEC", - [AMD_IP_VCN_ENC] = "VCN_ENC", + [AMD_IP_VCN_ENC] = info->family >= CHIP_GFX1100 ? "VCN" : "VCN_ENC", [AMD_IP_VCN_JPEG] = "VCN_JPG", }; for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) { if (info->ip[i].num_queues) { - fprintf(f, " IP %-4s %2u.%u \tqueues:%u\n", ip_string[i], + fprintf(f, " IP %-7s %2u.%u \tqueues:%u\n", ip_string[i], info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues); } } - if (info->family >= CHIP_GFX1100) - ip_string[AMD_IP_VCN_UNIFIED] = "VCN_UNIFIED"; + fprintf(f, "Identification:\n"); + fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus, + info->pci_dev, info->pci_func); + fprintf(f, " pci_id = 0x%x\n", info->pci_id); + fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id); + fprintf(f, " family = %i\n", info->family); + fprintf(f, " gfx_level = %i\n", info->gfx_level); + fprintf(f, " family_id = %i\n", info->family_id); + fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev); + fprintf(f, "Flags:\n"); + fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics); fprintf(f, " has_graphics = %i\n", info->has_graphics); fprintf(f, " has_clear_state = %u\n", info->has_clear_state); fprintf(f, " has_distributed_tess = %u\n", info->has_distributed_tess); @@ -1380,7 +1440,6 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024 * 1024)); fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024)); fprintf(f, " vram_type = %i\n", info->vram_type); - fprintf(f, " memory_bus_width = %i\n", info->memory_bus_width); fprintf(f, " max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024)); fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size); fprintf(f, " address32_hi = 0x%x\n", info->address32_hi); @@ -1388,16 +1447,13 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " all_vram_visible = %u\n", info->all_vram_visible); fprintf(f, " smart_access_memory = %u\n", info->smart_access_memory); fprintf(f, " max_tcc_blocks = %i\n", info->max_tcc_blocks); - fprintf(f, " num_tcc_blocks = %i\n", info->num_tcc_blocks); fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size); fprintf(f, " tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent); fprintf(f, " pc_lines = %u\n", info->pc_lines); fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup); fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity); fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity); - fprintf(f, " memory_freq = %i MHz\n", info->memory_freq_mhz); - fprintf(f, " l1_cache_size = %i\n", info->l1_cache_size); - fprintf(f, " l2_cache_size = %i\n", info->l2_cache_size); + fprintf(f, " max_memory_clock = %i MHz\n", info->memory_freq_mhz); fprintf(f, "CP info:\n"); fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2); @@ -1461,12 +1517,9 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) } } fprintf(f, " spi_cu_en_has_effect = %i\n", info->spi_cu_en_has_effect); - fprintf(f, " max_shader_clock = %i MHz\n", info->max_shader_clock); - fprintf(f, " num_cu = %i\n", info->num_cu); fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa); fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa); fprintf(f, " max_se = %i\n", info->max_se); - fprintf(f, " num_se = %i\n", info->num_se); fprintf(f, " max_sa_per_se = %i\n", info->max_sa_per_se); fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd); fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 52f77910fd9..1048872fd17 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -49,27 +49,42 @@ struct amd_ip_info { }; struct radeon_info { + /* Device info. */ + const char *name; + char lowercase_name[32]; + const char *marketing_name; + uint32_t num_se; /* only enabled SEs */ + uint32_t num_rb; /* only enabled RBs */ + uint32_t num_cu; /* only enabled CUs */ + uint32_t max_gpu_freq_mhz; /* also known as the shader clock */ + uint32_t max_gflops; + uint32_t l1_cache_size; + uint32_t l2_cache_size; + uint32_t l3_cache_size_mb; + uint32_t num_tcc_blocks; /* also the number of memory channels */ + uint32_t memory_freq_mhz; + uint32_t memory_freq_mhz_effective; + uint32_t memory_bus_width; + uint32_t memory_bandwidth_gbps; + uint32_t clock_crystal_freq; + struct amd_ip_info ip[AMD_NUM_IP_TYPES]; + + /* Identification. */ /* PCI info: domain:bus:dev:func */ uint32_t pci_domain; uint32_t pci_bus; uint32_t pci_dev; uint32_t pci_func; - /* Device info. */ - const char *name; - char lowercase_name[32]; - const char *marketing_name; - bool is_pro_graphics; uint32_t pci_id; uint32_t pci_rev_id; enum radeon_family family; enum amd_gfx_level gfx_level; uint32_t family_id; uint32_t chip_external_rev; - uint32_t clock_crystal_freq; - /* Features. */ - struct amd_ip_info ip[AMD_NUM_IP_TYPES]; + /* Flags. */ + bool is_pro_graphics; bool has_graphics; /* false if the chip is compute-only */ uint32_t ib_pad_dw_mask[AMD_NUM_IP_TYPES]; bool has_clear_state; @@ -114,7 +129,6 @@ struct radeon_info { uint64_t gart_size; uint64_t vram_size; uint64_t vram_vis_size; - uint32_t memory_bus_width; uint32_t vram_type; uint32_t max_heap_size_kb; uint32_t min_alloc_size; @@ -125,16 +139,12 @@ struct radeon_info { bool has_l2_uncached; bool r600_has_virtual_memory; uint32_t max_tcc_blocks; - uint32_t num_tcc_blocks; uint32_t tcc_cache_line_size; bool tcc_rb_non_coherent; /* whether L2 inv is needed for render->texture transitions */ unsigned pc_lines; uint32_t lds_size_per_workgroup; uint32_t lds_alloc_granularity; uint32_t lds_encode_granularity; - uint32_t memory_freq_mhz; - uint32_t l1_cache_size; - uint32_t l2_cache_size; /* CP info. */ bool gfx_ib_pad_with_type2; @@ -203,12 +213,9 @@ struct radeon_info { /* Shader cores. */ uint32_t cu_mask[AMD_MAX_SE][AMD_MAX_SA_PER_SE]; uint32_t r600_max_quad_pipes; /* wave size / 16 */ - uint32_t max_shader_clock; - uint32_t num_cu; /* only enabled CUs */ uint32_t max_good_cu_per_sa; uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */ uint32_t max_se; /* number of shader engines incl. disabled ones */ - uint32_t num_se; /* number of enabled shader engines */ uint32_t max_sa_per_se; /* shader arrays per shader engine */ uint32_t max_wave64_per_simd; uint32_t num_physical_sgprs_per_simd; diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index f2b3b831b6c..438ad78f492 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -451,8 +451,8 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info, if (rad_info->gfx_level >= GFX9) chunk->flags |= SQTT_FILE_CHUNK_ASIC_INFO_FLAG_PS1_EVENT_TOKENS_ENABLED; - chunk->trace_shader_core_clock = rad_info->max_shader_clock * 1000000; - chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000; + chunk->trace_shader_core_clock = rad_info->max_gpu_freq_mhz * 1000000ull; + chunk->trace_memory_clock = rad_info->memory_freq_mhz * 1000000ull; /* RGP gets very confused if these clocks are 0. The numbers here are for profile_peak on * VGH since that is the chips where we've seen the need for this workaround. */ @@ -506,7 +506,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info, chunk->pixels_per_clock = 0.0; chunk->gpu_timestamp_frequency = rad_info->clock_crystal_freq * 1000; - chunk->max_shader_core_clock = rad_info->max_shader_clock * 1000000; + chunk->max_shader_core_clock = rad_info->max_gpu_freq_mhz * 1000000; chunk->max_memory_clock = rad_info->memory_freq_mhz * 1000000; chunk->memory_ops_per_clock = ac_memory_ops_per_clock(rad_info->vram_type); chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type); diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index f35dc8245ef..5fd2e9377d0 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1022,7 +1022,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: if (ret) { uint32_t *max_clock_frequency = ret; - *max_clock_frequency = rscreen->info.max_shader_clock; + *max_clock_frequency = rscreen->info.max_gpu_freq_mhz; } return sizeof(uint32_t); @@ -1311,7 +1311,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, printf("has_syncobj = %u\n", rscreen->info.has_syncobj); printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); - printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); + printf("max_gpu_freq_mhz = %i\n", rscreen->info.max_gpu_freq_mhz); printf("num_cu = %i\n", rscreen->info.num_cu); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sa_per_se); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index aea36614138..06f21120662 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -927,7 +927,7 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: if (ret) { uint32_t *max_clock_frequency = ret; - *max_clock_frequency = sscreen->info.max_shader_clock; + *max_clock_frequency = sscreen->info.max_gpu_freq_mhz; } return sizeof(uint32_t); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index d354fc8ea61..08301075ab9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -392,8 +392,8 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) /* Get max clock frequency info and convert it to MHz */ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, - &ws->info.max_shader_clock); - ws->info.max_shader_clock /= 1000; + &ws->info.max_gpu_freq_mhz); + ws->info.max_gpu_freq_mhz /= 1000; ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);