intel/perf: store a copy of devinfo

In the future we'll pull more information off devinfo.

v2: Constify pointers (Ian)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16144>
This commit is contained in:
Lionel Landwerlin 2021-06-23 17:47:29 +03:00 committed by Marge Bot
parent 0df4b96062
commit efc2782f97
5 changed files with 21 additions and 33 deletions

View File

@ -212,16 +212,16 @@ hw_vars = {}
hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus"
hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count"
hw_vars["$EuThreadsCount"] = "perf->devinfo.num_thread_per_eu"
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
# only has dual subslices which can be assimilated with 16EUs subslices.
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency"
hw_vars["$GpuTimestampFrequency"] = "perf->devinfo.timestamp_frequency"
hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
hw_vars["$SkuRevisionId"] = "perf->sys_vars.revision"
hw_vars["$SkuRevisionId"] = "perf->devinfo.revision"
hw_vars["$QueryMode"] = "perf->sys_vars.query_mode"
def output_rpn_equation_code(set, counter, equation):

View File

@ -347,9 +347,10 @@ init_oa_configs(struct intel_perf_config *perf, int fd,
}
static void
compute_topology_builtins(struct intel_perf_config *perf,
const struct intel_device_info *devinfo)
compute_topology_builtins(struct intel_perf_config *perf)
{
const struct intel_device_info *devinfo = &perf->devinfo;
perf->sys_vars.slice_mask = devinfo->slice_masks;
perf->sys_vars.n_eu_slices = devinfo->num_slices;
@ -361,8 +362,6 @@ compute_topology_builtins(struct intel_perf_config *perf,
for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]);
perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
/* The subslice mask builtin contains bits for all slices. Prior to Gfx11
* it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for
* each slice.
@ -384,7 +383,6 @@ compute_topology_builtins(struct intel_perf_config *perf,
static bool
init_oa_sys_vars(struct intel_perf_config *perf,
const struct intel_device_info *devinfo,
bool use_register_snapshots)
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
@ -403,10 +401,8 @@ init_oa_sys_vars(struct intel_perf_config *perf,
memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
perf->sys_vars.revision = devinfo->revision;
perf->sys_vars.query_mode = use_register_snapshots;
compute_topology_builtins(perf, devinfo);
compute_topology_builtins(perf);
return true;
}
@ -700,6 +696,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
bool i915_perf_oa_available = false;
struct stat sb;
perf->devinfo = *devinfo;
perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
perf->i915_perf_version = i915_perf_version(fd);
@ -731,7 +728,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
return i915_perf_oa_available &&
oa_register &&
get_sysfs_dev_dir(perf, fd) &&
init_oa_sys_vars(perf, devinfo, use_register_snapshots);
init_oa_sys_vars(perf, use_register_snapshots);
}
static void
@ -1037,7 +1034,6 @@ intel_perf_report_timestamp(const struct intel_perf_query_info *query,
void
intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
const struct intel_perf_query_info *query,
const struct intel_device_info *devinfo,
const uint32_t *start,
const uint32_t *end)
{
@ -1072,7 +1068,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
result->accumulator + query->a_offset + 32 + i);
}
if (can_use_mi_rpc_bc_counters(devinfo)) {
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo)) {
/* 8x 32bit B counters */
for (i = 0; i < 8; i++) {
accumulate_uint32(start + 48 + i, end + 48 + i,
@ -1170,15 +1166,15 @@ query_accumulator_offset(const struct intel_perf_query_info *query,
void
intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
const struct intel_perf_query_info *query,
const struct intel_device_info *devinfo,
const void *start,
const void *end,
bool no_oa_accumulate)
{
struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
const struct intel_device_info *devinfo = &query->perf->devinfo;
for (uint32_t r = 0; r < layout->n_fields; r++) {
struct intel_perf_query_field *field = &layout->fields[r];
const struct intel_perf_query_field *field = &layout->fields[r];
if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) {
intel_perf_query_result_read_frequencies(result, devinfo,
@ -1189,7 +1185,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result
* unrelated deltas, so don't accumulate the begin/end reports here.
*/
if (!no_oa_accumulate) {
intel_perf_query_result_accumulate(result, query, devinfo,
intel_perf_query_result_accumulate(result, query,
start + field->location,
end + field->location);
}
@ -1230,7 +1226,6 @@ intel_perf_query_result_clear(struct intel_perf_query_result *result)
void
intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
const struct intel_device_info *devinfo,
const void *data)
{
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;

View File

@ -35,8 +35,9 @@
#include <sys/mkdev.h>
#endif
#include "util/hash_table.h"
#include "compiler/glsl/list.h"
#include "dev/intel_device_info.h"
#include "util/hash_table.h"
#include "util/ralloc.h"
#include "drm-uapi/i915_drm.h"
@ -45,8 +46,6 @@
extern "C" {
#endif
struct intel_device_info;
struct intel_perf_config;
struct intel_perf_query_info;
@ -334,19 +333,18 @@ struct intel_perf_config {
* All uint64_t for consistent operand types in generated code
*/
struct {
uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
uint64_t n_eus; /** $EuCoresTotalCount */
uint64_t n_eu_slices; /** $EuSlicesTotalCount */
uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
uint64_t eu_threads_count; /** $EuThreadsCount */
uint64_t slice_mask; /** $SliceMask */
uint64_t subslice_mask; /** $SubsliceMask */
uint64_t gt_min_freq; /** $GpuMinFrequency */
uint64_t gt_max_freq; /** $GpuMaxFrequency */
uint64_t revision; /** $SkuRevisionId */
bool query_mode; /** $QueryMode */
} sys_vars;
struct intel_device_info devinfo;
/* OA metric sets, indexed by GUID, as know by Mesa at build time, to
* cross-reference with the GUIDs of configs advertised by the kernel at
* runtime
@ -455,7 +453,6 @@ void intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *resul
*/
void intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
const struct intel_perf_query_info *query,
const struct intel_device_info *devinfo,
const uint32_t *start,
const uint32_t *end);
@ -469,7 +466,6 @@ uint64_t intel_perf_report_timestamp(const struct intel_perf_query_info *query,
*/
void intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
const struct intel_perf_query_info *query,
const struct intel_device_info *devinfo,
const void *start,
const void *end,
bool no_oa_accumulate);
@ -479,7 +475,6 @@ void intel_perf_query_result_clear(struct intel_perf_query_result *result);
/** Debug helper printing out query data.
*/
void intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
const struct intel_device_info *devinfo,
const void *data);
static inline size_t

View File

@ -1374,7 +1374,6 @@ accumulate_oa_reports(struct intel_perf_context *perf_ctx,
if (add) {
intel_perf_query_result_accumulate(&query->oa.result,
query->queryinfo,
devinfo,
last, report);
} else {
/* We're not adding the delta because we've identified it's not
@ -1403,7 +1402,7 @@ accumulate_oa_reports(struct intel_perf_context *perf_ctx,
end:
intel_perf_query_result_accumulate(&query->oa.result, query->queryinfo,
devinfo, last, end);
last, end);
query->oa.results_accumulated = true;
drop_from_unaccumulated_query_list(perf_ctx, query);
@ -1574,7 +1573,6 @@ intel_perf_get_query_data(struct intel_perf_context *perf_ctx,
uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size;
intel_perf_query_result_accumulate_fields(&query->oa.result,
query->queryinfo,
perf_ctx->devinfo,
begin_report,
end_report,
true /* no_oa_accumulate */);

View File

@ -562,7 +562,7 @@ VkResult genX(GetQueryPoolResults)(
const struct intel_perf_query_info *query = pool->pass_query[p];
struct intel_perf_query_result result;
intel_perf_query_result_clear(&result);
intel_perf_query_result_accumulate_fields(&result, query, &device->info,
intel_perf_query_result_accumulate_fields(&result, query,
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, false),
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, true),
false /* no_oa_accumulate */);
@ -579,7 +579,7 @@ VkResult genX(GetQueryPoolResults)(
const struct intel_perf_query_info *query = &device->physical->perf->queries[0];
struct intel_perf_query_result result;
intel_perf_query_result_clear(&result);
intel_perf_query_result_accumulate_fields(&result, query, &device->info,
intel_perf_query_result_accumulate_fields(&result, query,
query_data + intel_perf_query_data_offset(pool, false),
query_data + intel_perf_query_data_offset(pool, true),
false /* no_oa_accumulate */);