intel/perf: store a copy of devinfo
In the future we'll pull more information off devinfo. v2: Constify pointers (Ian) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16144>
This commit is contained in:
parent
0df4b96062
commit
efc2782f97
|
@ -212,16 +212,16 @@ hw_vars = {}
|
||||||
hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus"
|
hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus"
|
||||||
hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
|
hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
|
||||||
hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
|
hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
|
||||||
hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count"
|
hw_vars["$EuThreadsCount"] = "perf->devinfo.num_thread_per_eu"
|
||||||
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
|
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
|
||||||
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
|
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
|
||||||
# only has dual subslices which can be assimilated with 16EUs subslices.
|
# only has dual subslices which can be assimilated with 16EUs subslices.
|
||||||
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
|
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
|
||||||
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
|
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
|
||||||
hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency"
|
hw_vars["$GpuTimestampFrequency"] = "perf->devinfo.timestamp_frequency"
|
||||||
hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
|
hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
|
||||||
hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
|
hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
|
||||||
hw_vars["$SkuRevisionId"] = "perf->sys_vars.revision"
|
hw_vars["$SkuRevisionId"] = "perf->devinfo.revision"
|
||||||
hw_vars["$QueryMode"] = "perf->sys_vars.query_mode"
|
hw_vars["$QueryMode"] = "perf->sys_vars.query_mode"
|
||||||
|
|
||||||
def output_rpn_equation_code(set, counter, equation):
|
def output_rpn_equation_code(set, counter, equation):
|
||||||
|
|
|
@ -347,9 +347,10 @@ init_oa_configs(struct intel_perf_config *perf, int fd,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
compute_topology_builtins(struct intel_perf_config *perf,
|
compute_topology_builtins(struct intel_perf_config *perf)
|
||||||
const struct intel_device_info *devinfo)
|
|
||||||
{
|
{
|
||||||
|
const struct intel_device_info *devinfo = &perf->devinfo;
|
||||||
|
|
||||||
perf->sys_vars.slice_mask = devinfo->slice_masks;
|
perf->sys_vars.slice_mask = devinfo->slice_masks;
|
||||||
perf->sys_vars.n_eu_slices = devinfo->num_slices;
|
perf->sys_vars.n_eu_slices = devinfo->num_slices;
|
||||||
|
|
||||||
|
@ -361,8 +362,6 @@ compute_topology_builtins(struct intel_perf_config *perf,
|
||||||
for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
|
for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
|
||||||
perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]);
|
perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]);
|
||||||
|
|
||||||
perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
|
|
||||||
|
|
||||||
/* The subslice mask builtin contains bits for all slices. Prior to Gfx11
|
/* The subslice mask builtin contains bits for all slices. Prior to Gfx11
|
||||||
* it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for
|
* it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for
|
||||||
* each slice.
|
* each slice.
|
||||||
|
@ -384,7 +383,6 @@ compute_topology_builtins(struct intel_perf_config *perf,
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
init_oa_sys_vars(struct intel_perf_config *perf,
|
init_oa_sys_vars(struct intel_perf_config *perf,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
bool use_register_snapshots)
|
bool use_register_snapshots)
|
||||||
{
|
{
|
||||||
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
|
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
|
||||||
|
@ -403,10 +401,8 @@ init_oa_sys_vars(struct intel_perf_config *perf,
|
||||||
memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
|
memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
|
||||||
perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
|
perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
|
||||||
perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
|
perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
|
||||||
perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
|
|
||||||
perf->sys_vars.revision = devinfo->revision;
|
|
||||||
perf->sys_vars.query_mode = use_register_snapshots;
|
perf->sys_vars.query_mode = use_register_snapshots;
|
||||||
compute_topology_builtins(perf, devinfo);
|
compute_topology_builtins(perf);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -700,6 +696,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
|
||||||
bool i915_perf_oa_available = false;
|
bool i915_perf_oa_available = false;
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
|
|
||||||
|
perf->devinfo = *devinfo;
|
||||||
perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
|
perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
|
||||||
perf->i915_perf_version = i915_perf_version(fd);
|
perf->i915_perf_version = i915_perf_version(fd);
|
||||||
|
|
||||||
|
@ -731,7 +728,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
|
||||||
return i915_perf_oa_available &&
|
return i915_perf_oa_available &&
|
||||||
oa_register &&
|
oa_register &&
|
||||||
get_sysfs_dev_dir(perf, fd) &&
|
get_sysfs_dev_dir(perf, fd) &&
|
||||||
init_oa_sys_vars(perf, devinfo, use_register_snapshots);
|
init_oa_sys_vars(perf, use_register_snapshots);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -1037,7 +1034,6 @@ intel_perf_report_timestamp(const struct intel_perf_query_info *query,
|
||||||
void
|
void
|
||||||
intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||||
const struct intel_perf_query_info *query,
|
const struct intel_perf_query_info *query,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
const uint32_t *start,
|
const uint32_t *start,
|
||||||
const uint32_t *end)
|
const uint32_t *end)
|
||||||
{
|
{
|
||||||
|
@ -1072,7 +1068,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||||
result->accumulator + query->a_offset + 32 + i);
|
result->accumulator + query->a_offset + 32 + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (can_use_mi_rpc_bc_counters(devinfo)) {
|
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo)) {
|
||||||
/* 8x 32bit B counters */
|
/* 8x 32bit B counters */
|
||||||
for (i = 0; i < 8; i++) {
|
for (i = 0; i < 8; i++) {
|
||||||
accumulate_uint32(start + 48 + i, end + 48 + i,
|
accumulate_uint32(start + 48 + i, end + 48 + i,
|
||||||
|
@ -1170,15 +1166,15 @@ query_accumulator_offset(const struct intel_perf_query_info *query,
|
||||||
void
|
void
|
||||||
intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
|
intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
|
||||||
const struct intel_perf_query_info *query,
|
const struct intel_perf_query_info *query,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
const void *start,
|
const void *start,
|
||||||
const void *end,
|
const void *end,
|
||||||
bool no_oa_accumulate)
|
bool no_oa_accumulate)
|
||||||
{
|
{
|
||||||
struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||||
|
const struct intel_device_info *devinfo = &query->perf->devinfo;
|
||||||
|
|
||||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||||
struct intel_perf_query_field *field = &layout->fields[r];
|
const struct intel_perf_query_field *field = &layout->fields[r];
|
||||||
|
|
||||||
if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) {
|
if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) {
|
||||||
intel_perf_query_result_read_frequencies(result, devinfo,
|
intel_perf_query_result_read_frequencies(result, devinfo,
|
||||||
|
@ -1189,7 +1185,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result
|
||||||
* unrelated deltas, so don't accumulate the begin/end reports here.
|
* unrelated deltas, so don't accumulate the begin/end reports here.
|
||||||
*/
|
*/
|
||||||
if (!no_oa_accumulate) {
|
if (!no_oa_accumulate) {
|
||||||
intel_perf_query_result_accumulate(result, query, devinfo,
|
intel_perf_query_result_accumulate(result, query,
|
||||||
start + field->location,
|
start + field->location,
|
||||||
end + field->location);
|
end + field->location);
|
||||||
}
|
}
|
||||||
|
@ -1230,7 +1226,6 @@ intel_perf_query_result_clear(struct intel_perf_query_result *result)
|
||||||
|
|
||||||
void
|
void
|
||||||
intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
|
intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
const void *data)
|
const void *data)
|
||||||
{
|
{
|
||||||
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||||
|
|
|
@ -35,8 +35,9 @@
|
||||||
#include <sys/mkdev.h>
|
#include <sys/mkdev.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "util/hash_table.h"
|
|
||||||
#include "compiler/glsl/list.h"
|
#include "compiler/glsl/list.h"
|
||||||
|
#include "dev/intel_device_info.h"
|
||||||
|
#include "util/hash_table.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
|
|
||||||
#include "drm-uapi/i915_drm.h"
|
#include "drm-uapi/i915_drm.h"
|
||||||
|
@ -45,8 +46,6 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct intel_device_info;
|
|
||||||
|
|
||||||
struct intel_perf_config;
|
struct intel_perf_config;
|
||||||
struct intel_perf_query_info;
|
struct intel_perf_query_info;
|
||||||
|
|
||||||
|
@ -334,19 +333,18 @@ struct intel_perf_config {
|
||||||
* All uint64_t for consistent operand types in generated code
|
* All uint64_t for consistent operand types in generated code
|
||||||
*/
|
*/
|
||||||
struct {
|
struct {
|
||||||
uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
|
|
||||||
uint64_t n_eus; /** $EuCoresTotalCount */
|
uint64_t n_eus; /** $EuCoresTotalCount */
|
||||||
uint64_t n_eu_slices; /** $EuSlicesTotalCount */
|
uint64_t n_eu_slices; /** $EuSlicesTotalCount */
|
||||||
uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
|
uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
|
||||||
uint64_t eu_threads_count; /** $EuThreadsCount */
|
|
||||||
uint64_t slice_mask; /** $SliceMask */
|
uint64_t slice_mask; /** $SliceMask */
|
||||||
uint64_t subslice_mask; /** $SubsliceMask */
|
uint64_t subslice_mask; /** $SubsliceMask */
|
||||||
uint64_t gt_min_freq; /** $GpuMinFrequency */
|
uint64_t gt_min_freq; /** $GpuMinFrequency */
|
||||||
uint64_t gt_max_freq; /** $GpuMaxFrequency */
|
uint64_t gt_max_freq; /** $GpuMaxFrequency */
|
||||||
uint64_t revision; /** $SkuRevisionId */
|
|
||||||
bool query_mode; /** $QueryMode */
|
bool query_mode; /** $QueryMode */
|
||||||
} sys_vars;
|
} sys_vars;
|
||||||
|
|
||||||
|
struct intel_device_info devinfo;
|
||||||
|
|
||||||
/* OA metric sets, indexed by GUID, as know by Mesa at build time, to
|
/* OA metric sets, indexed by GUID, as know by Mesa at build time, to
|
||||||
* cross-reference with the GUIDs of configs advertised by the kernel at
|
* cross-reference with the GUIDs of configs advertised by the kernel at
|
||||||
* runtime
|
* runtime
|
||||||
|
@ -455,7 +453,6 @@ void intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *resul
|
||||||
*/
|
*/
|
||||||
void intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
void intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||||
const struct intel_perf_query_info *query,
|
const struct intel_perf_query_info *query,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
const uint32_t *start,
|
const uint32_t *start,
|
||||||
const uint32_t *end);
|
const uint32_t *end);
|
||||||
|
|
||||||
|
@ -469,7 +466,6 @@ uint64_t intel_perf_report_timestamp(const struct intel_perf_query_info *query,
|
||||||
*/
|
*/
|
||||||
void intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
|
void intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
|
||||||
const struct intel_perf_query_info *query,
|
const struct intel_perf_query_info *query,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
const void *start,
|
const void *start,
|
||||||
const void *end,
|
const void *end,
|
||||||
bool no_oa_accumulate);
|
bool no_oa_accumulate);
|
||||||
|
@ -479,7 +475,6 @@ void intel_perf_query_result_clear(struct intel_perf_query_result *result);
|
||||||
/** Debug helper printing out query data.
|
/** Debug helper printing out query data.
|
||||||
*/
|
*/
|
||||||
void intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
|
void intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
|
||||||
const struct intel_device_info *devinfo,
|
|
||||||
const void *data);
|
const void *data);
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
|
|
|
@ -1374,7 +1374,6 @@ accumulate_oa_reports(struct intel_perf_context *perf_ctx,
|
||||||
if (add) {
|
if (add) {
|
||||||
intel_perf_query_result_accumulate(&query->oa.result,
|
intel_perf_query_result_accumulate(&query->oa.result,
|
||||||
query->queryinfo,
|
query->queryinfo,
|
||||||
devinfo,
|
|
||||||
last, report);
|
last, report);
|
||||||
} else {
|
} else {
|
||||||
/* We're not adding the delta because we've identified it's not
|
/* We're not adding the delta because we've identified it's not
|
||||||
|
@ -1403,7 +1402,7 @@ accumulate_oa_reports(struct intel_perf_context *perf_ctx,
|
||||||
end:
|
end:
|
||||||
|
|
||||||
intel_perf_query_result_accumulate(&query->oa.result, query->queryinfo,
|
intel_perf_query_result_accumulate(&query->oa.result, query->queryinfo,
|
||||||
devinfo, last, end);
|
last, end);
|
||||||
|
|
||||||
query->oa.results_accumulated = true;
|
query->oa.results_accumulated = true;
|
||||||
drop_from_unaccumulated_query_list(perf_ctx, query);
|
drop_from_unaccumulated_query_list(perf_ctx, query);
|
||||||
|
@ -1574,7 +1573,6 @@ intel_perf_get_query_data(struct intel_perf_context *perf_ctx,
|
||||||
uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size;
|
uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size;
|
||||||
intel_perf_query_result_accumulate_fields(&query->oa.result,
|
intel_perf_query_result_accumulate_fields(&query->oa.result,
|
||||||
query->queryinfo,
|
query->queryinfo,
|
||||||
perf_ctx->devinfo,
|
|
||||||
begin_report,
|
begin_report,
|
||||||
end_report,
|
end_report,
|
||||||
true /* no_oa_accumulate */);
|
true /* no_oa_accumulate */);
|
||||||
|
|
|
@ -562,7 +562,7 @@ VkResult genX(GetQueryPoolResults)(
|
||||||
const struct intel_perf_query_info *query = pool->pass_query[p];
|
const struct intel_perf_query_info *query = pool->pass_query[p];
|
||||||
struct intel_perf_query_result result;
|
struct intel_perf_query_result result;
|
||||||
intel_perf_query_result_clear(&result);
|
intel_perf_query_result_clear(&result);
|
||||||
intel_perf_query_result_accumulate_fields(&result, query, &device->info,
|
intel_perf_query_result_accumulate_fields(&result, query,
|
||||||
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, false),
|
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, false),
|
||||||
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, true),
|
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, true),
|
||||||
false /* no_oa_accumulate */);
|
false /* no_oa_accumulate */);
|
||||||
|
@ -579,7 +579,7 @@ VkResult genX(GetQueryPoolResults)(
|
||||||
const struct intel_perf_query_info *query = &device->physical->perf->queries[0];
|
const struct intel_perf_query_info *query = &device->physical->perf->queries[0];
|
||||||
struct intel_perf_query_result result;
|
struct intel_perf_query_result result;
|
||||||
intel_perf_query_result_clear(&result);
|
intel_perf_query_result_clear(&result);
|
||||||
intel_perf_query_result_accumulate_fields(&result, query, &device->info,
|
intel_perf_query_result_accumulate_fields(&result, query,
|
||||||
query_data + intel_perf_query_data_offset(pool, false),
|
query_data + intel_perf_query_data_offset(pool, false),
|
||||||
query_data + intel_perf_query_data_offset(pool, true),
|
query_data + intel_perf_query_data_offset(pool, true),
|
||||||
false /* no_oa_accumulate */);
|
false /* no_oa_accumulate */);
|
||||||
|
|
Loading…
Reference in New Issue