diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py index 916fa2728ef..30cf68379c5 100644 --- a/src/intel/perf/gen_perf.py +++ b/src/intel/perf/gen_perf.py @@ -212,16 +212,16 @@ hw_vars = {} hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus" hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices" hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" -hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count" +hw_vars["$EuThreadsCount"] = "perf->devinfo.num_thread_per_eu" hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask" # subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+ # only has dual subslices which can be assimilated with 16EUs subslices. hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask" hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask" -hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency" +hw_vars["$GpuTimestampFrequency"] = "perf->devinfo.timestamp_frequency" hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq" hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq" -hw_vars["$SkuRevisionId"] = "perf->sys_vars.revision" +hw_vars["$SkuRevisionId"] = "perf->devinfo.revision" hw_vars["$QueryMode"] = "perf->sys_vars.query_mode" def output_rpn_equation_code(set, counter, equation): diff --git a/src/intel/perf/intel_perf.c b/src/intel/perf/intel_perf.c index f750e673fd3..a377a241fc6 100644 --- a/src/intel/perf/intel_perf.c +++ b/src/intel/perf/intel_perf.c @@ -347,9 +347,10 @@ init_oa_configs(struct intel_perf_config *perf, int fd, } static void -compute_topology_builtins(struct intel_perf_config *perf, - const struct intel_device_info *devinfo) +compute_topology_builtins(struct intel_perf_config *perf) { + const struct intel_device_info *devinfo = &perf->devinfo; + perf->sys_vars.slice_mask = devinfo->slice_masks; perf->sys_vars.n_eu_slices = devinfo->num_slices; @@ -361,8 +362,6 @@ compute_topology_builtins(struct intel_perf_config *perf, for (int i = 0; i < sizeof(devinfo->eu_masks); i++) perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]); - perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu; - /* The subslice mask builtin contains bits for all slices. Prior to Gfx11 * it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for * each slice. @@ -384,7 +383,6 @@ compute_topology_builtins(struct intel_perf_config *perf, static bool init_oa_sys_vars(struct intel_perf_config *perf, - const struct intel_device_info *devinfo, bool use_register_snapshots) { uint64_t min_freq_mhz = 0, max_freq_mhz = 0; @@ -403,10 +401,8 @@ init_oa_sys_vars(struct intel_perf_config *perf, memset(&perf->sys_vars, 0, sizeof(perf->sys_vars)); perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000; perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000; - perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency; - perf->sys_vars.revision = devinfo->revision; perf->sys_vars.query_mode = use_register_snapshots; - compute_topology_builtins(perf, devinfo); + compute_topology_builtins(perf); return true; } @@ -700,6 +696,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd, bool i915_perf_oa_available = false; struct stat sb; + perf->devinfo = *devinfo; perf->i915_query_supported = i915_query_perf_config_supported(perf, fd); perf->i915_perf_version = i915_perf_version(fd); @@ -731,7 +728,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd, return i915_perf_oa_available && oa_register && get_sysfs_dev_dir(perf, fd) && - init_oa_sys_vars(perf, devinfo, use_register_snapshots); + init_oa_sys_vars(perf, use_register_snapshots); } static void @@ -1037,7 +1034,6 @@ intel_perf_report_timestamp(const struct intel_perf_query_info *query, void intel_perf_query_result_accumulate(struct intel_perf_query_result *result, const struct intel_perf_query_info *query, - const struct intel_device_info *devinfo, const uint32_t *start, const uint32_t *end) { @@ -1072,7 +1068,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result, result->accumulator + query->a_offset + 32 + i); } - if (can_use_mi_rpc_bc_counters(devinfo)) { + if (can_use_mi_rpc_bc_counters(&query->perf->devinfo)) { /* 8x 32bit B counters */ for (i = 0; i < 8; i++) { accumulate_uint32(start + 48 + i, end + 48 + i, @@ -1170,15 +1166,15 @@ query_accumulator_offset(const struct intel_perf_query_info *query, void intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result, const struct intel_perf_query_info *query, - const struct intel_device_info *devinfo, const void *start, const void *end, bool no_oa_accumulate) { - struct intel_perf_query_field_layout *layout = &query->perf->query_layout; + const struct intel_perf_query_field_layout *layout = &query->perf->query_layout; + const struct intel_device_info *devinfo = &query->perf->devinfo; for (uint32_t r = 0; r < layout->n_fields; r++) { - struct intel_perf_query_field *field = &layout->fields[r]; + const struct intel_perf_query_field *field = &layout->fields[r]; if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) { intel_perf_query_result_read_frequencies(result, devinfo, @@ -1189,7 +1185,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result * unrelated deltas, so don't accumulate the begin/end reports here. */ if (!no_oa_accumulate) { - intel_perf_query_result_accumulate(result, query, devinfo, + intel_perf_query_result_accumulate(result, query, start + field->location, end + field->location); } @@ -1230,7 +1226,6 @@ intel_perf_query_result_clear(struct intel_perf_query_result *result) void intel_perf_query_result_print_fields(const struct intel_perf_query_info *query, - const struct intel_device_info *devinfo, const void *data) { const struct intel_perf_query_field_layout *layout = &query->perf->query_layout; diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h index 4d99d194de2..194be876417 100644 --- a/src/intel/perf/intel_perf.h +++ b/src/intel/perf/intel_perf.h @@ -35,8 +35,9 @@ #include #endif -#include "util/hash_table.h" #include "compiler/glsl/list.h" +#include "dev/intel_device_info.h" +#include "util/hash_table.h" #include "util/ralloc.h" #include "drm-uapi/i915_drm.h" @@ -45,8 +46,6 @@ extern "C" { #endif -struct intel_device_info; - struct intel_perf_config; struct intel_perf_query_info; @@ -334,19 +333,18 @@ struct intel_perf_config { * All uint64_t for consistent operand types in generated code */ struct { - uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ uint64_t n_eus; /** $EuCoresTotalCount */ uint64_t n_eu_slices; /** $EuSlicesTotalCount */ uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ - uint64_t eu_threads_count; /** $EuThreadsCount */ uint64_t slice_mask; /** $SliceMask */ uint64_t subslice_mask; /** $SubsliceMask */ uint64_t gt_min_freq; /** $GpuMinFrequency */ uint64_t gt_max_freq; /** $GpuMaxFrequency */ - uint64_t revision; /** $SkuRevisionId */ bool query_mode; /** $QueryMode */ } sys_vars; + struct intel_device_info devinfo; + /* OA metric sets, indexed by GUID, as know by Mesa at build time, to * cross-reference with the GUIDs of configs advertised by the kernel at * runtime @@ -455,7 +453,6 @@ void intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *resul */ void intel_perf_query_result_accumulate(struct intel_perf_query_result *result, const struct intel_perf_query_info *query, - const struct intel_device_info *devinfo, const uint32_t *start, const uint32_t *end); @@ -469,7 +466,6 @@ uint64_t intel_perf_report_timestamp(const struct intel_perf_query_info *query, */ void intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result, const struct intel_perf_query_info *query, - const struct intel_device_info *devinfo, const void *start, const void *end, bool no_oa_accumulate); @@ -479,7 +475,6 @@ void intel_perf_query_result_clear(struct intel_perf_query_result *result); /** Debug helper printing out query data. */ void intel_perf_query_result_print_fields(const struct intel_perf_query_info *query, - const struct intel_device_info *devinfo, const void *data); static inline size_t diff --git a/src/intel/perf/intel_perf_query.c b/src/intel/perf/intel_perf_query.c index 65fadc026a9..0066de8159c 100644 --- a/src/intel/perf/intel_perf_query.c +++ b/src/intel/perf/intel_perf_query.c @@ -1374,7 +1374,6 @@ accumulate_oa_reports(struct intel_perf_context *perf_ctx, if (add) { intel_perf_query_result_accumulate(&query->oa.result, query->queryinfo, - devinfo, last, report); } else { /* We're not adding the delta because we've identified it's not @@ -1403,7 +1402,7 @@ accumulate_oa_reports(struct intel_perf_context *perf_ctx, end: intel_perf_query_result_accumulate(&query->oa.result, query->queryinfo, - devinfo, last, end); + last, end); query->oa.results_accumulated = true; drop_from_unaccumulated_query_list(perf_ctx, query); @@ -1574,7 +1573,6 @@ intel_perf_get_query_data(struct intel_perf_context *perf_ctx, uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size; intel_perf_query_result_accumulate_fields(&query->oa.result, query->queryinfo, - perf_ctx->devinfo, begin_report, end_report, true /* no_oa_accumulate */); diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index ef2520b6853..e302c4fe163 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -562,7 +562,7 @@ VkResult genX(GetQueryPoolResults)( const struct intel_perf_query_info *query = pool->pass_query[p]; struct intel_perf_query_result result; intel_perf_query_result_clear(&result); - intel_perf_query_result_accumulate_fields(&result, query, &device->info, + intel_perf_query_result_accumulate_fields(&result, query, pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, false), pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, true), false /* no_oa_accumulate */); @@ -579,7 +579,7 @@ VkResult genX(GetQueryPoolResults)( const struct intel_perf_query_info *query = &device->physical->perf->queries[0]; struct intel_perf_query_result result; intel_perf_query_result_clear(&result); - intel_perf_query_result_accumulate_fields(&result, query, &device->info, + intel_perf_query_result_accumulate_fields(&result, query, query_data + intel_perf_query_data_offset(pool, false), query_data + intel_perf_query_data_offset(pool, true), false /* no_oa_accumulate */);