From 73188a4590a8d407e5dbc181d3d61a82f92a80e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 30 Apr 2024 12:30:44 -0700 Subject: [PATCH] intel/perf: Add function to open perf stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make easy to add Xe KMD support and reduce code duplication. No changes in behavior are expected here. Reviewed-by: Lionel Landwerlin Signed-off-by: José Roberto de Souza Part-of: --- src/intel/perf/i915/intel_perf.c | 61 +++++++++++++++++++++++++++++++ src/intel/perf/i915/intel_perf.h | 6 +++ src/intel/perf/intel_perf.c | 19 ++++++++++ src/intel/perf/intel_perf.h | 5 +++ src/intel/perf/intel_perf_query.c | 50 ++----------------------- src/intel/vulkan/anv_perf.c | 47 ++---------------------- src/intel/vulkan_hasvk/anv_perf.c | 46 ++--------------------- 7 files changed, 102 insertions(+), 132 deletions(-) diff --git a/src/intel/perf/i915/intel_perf.c b/src/intel/perf/i915/intel_perf.c index 1482b5edef0bf..7e41c76c811d3 100644 --- a/src/intel/perf/i915/intel_perf.c +++ b/src/intel/perf/i915/intel_perf.c @@ -5,6 +5,7 @@ #include "perf/i915/intel_perf.h" +#include "common/intel_gem.h" #include "perf/intel_perf.h" #include "drm-uapi/i915_drm.h" @@ -18,3 +19,63 @@ uint64_t i915_perf_get_oa_format(struct intel_perf_config *perf) else return I915_OA_FORMAT_A24u40_A14u32_B8_C8; } + +int +i915_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd, + uint32_t ctx_id, uint64_t metrics_set_id, + uint64_t report_format, uint64_t period_exponent, + bool hold_preemption, bool enable) +{ + uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; + uint32_t p = 0; + + /* Single context sampling if valid context id. */ + if (ctx_id != INTEL_PERF_INVALID_CTX_ID) { + properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; + properties[p++] = ctx_id; + } + + /* Include OA reports in samples */ + properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; + properties[p++] = true; + + /* OA unit configuration */ + properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; + properties[p++] = metrics_set_id; + + properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; + properties[p++] = report_format; + + properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; + properties[p++] = period_exponent; + + if (hold_preemption) { + properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION; + properties[p++] = true; + } + + /* If global SSEU is available, pin it to the default. This will ensure on + * Gfx11 for instance we use the full EU array. Initially when perf was + * enabled we would use only half on Gfx11 because of functional + * requirements. + * + * Not supported on Gfx12.5+. + */ + if (intel_perf_has_global_sseu(perf_config) && + perf_config->devinfo->verx10 < 125) { + properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; + properties[p++] = (uintptr_t) &perf_config->sseu; + } + + assert(p <= ARRAY_SIZE(properties)); + + struct drm_i915_perf_open_param param = { + .flags = I915_PERF_FLAG_FD_CLOEXEC | + I915_PERF_FLAG_FD_NONBLOCK | + (enable ? 0 : I915_PERF_FLAG_DISABLED), + .num_properties = p / 2, + .properties_ptr = (uintptr_t) properties, + }; + int fd = intel_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); + return fd > -1 ? fd : 0; +} diff --git a/src/intel/perf/i915/intel_perf.h b/src/intel/perf/i915/intel_perf.h index aaac090ca3728..ac338a61663af 100644 --- a/src/intel/perf/i915/intel_perf.h +++ b/src/intel/perf/i915/intel_perf.h @@ -5,8 +5,14 @@ #pragma once +#include #include struct intel_perf_config; uint64_t i915_perf_get_oa_format(struct intel_perf_config *perf); + +int i915_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd, + uint32_t ctx_id, uint64_t metrics_set_id, + uint64_t report_format, uint64_t period_exponent, + bool hold_preemption, bool enable); diff --git a/src/intel/perf/intel_perf.c b/src/intel/perf/intel_perf.c index 9dff7b7845c64..7636ea2645d22 100644 --- a/src/intel/perf/intel_perf.c +++ b/src/intel/perf/intel_perf.c @@ -1580,3 +1580,22 @@ intel_perf_get_oa_format(struct intel_perf_config *perf_cfg) return 0; } } + +int +intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd, + uint32_t ctx_id, uint64_t metrics_set_id, + uint64_t period_exponent, bool hold_preemption, + bool enable) +{ + uint64_t report_format = intel_perf_get_oa_format(perf_config); + + switch (perf_config->devinfo->kmd_type) { + case INTEL_KMD_TYPE_I915: + return i915_perf_stream_open(perf_config, drm_fd, ctx_id, metrics_set_id, + report_format, period_exponent, + hold_preemption, enable); + default: + unreachable("missing"); + return 0; + } +} diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h index 549fcc4c28231..c0e95594f1561 100644 --- a/src/intel/perf/intel_perf.h +++ b/src/intel/perf/intel_perf.h @@ -565,6 +565,11 @@ void intel_perf_get_counters_passes(struct intel_perf_config *perf, uint32_t counter_indices_count, struct intel_perf_counter_pass *counter_pass); +int intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd, + uint32_t ctx_id, uint64_t metrics_set_id, + uint64_t period_exponent, bool hold_preemption, + bool enable); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/intel/perf/intel_perf_query.c b/src/intel/perf/intel_perf_query.c index 34c4acd75c39c..5b094fd851982 100644 --- a/src/intel/perf/intel_perf_query.c +++ b/src/intel/perf/intel_perf_query.c @@ -357,53 +357,9 @@ intel_perf_open(struct intel_perf_context *perf_ctx, uint32_t ctx_id, bool enable) { - uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; - uint32_t p = 0; - - /* Single context sampling if valid context id. */ - if (ctx_id != INTEL_PERF_INVALID_CTX_ID) { - properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; - properties[p++] = ctx_id; - } - - /* Include OA reports in samples */ - properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; - properties[p++] = true; - - /* OA unit configuration */ - properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; - properties[p++] = metrics_set_id; - - properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = report_format; - - properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; - properties[p++] = period_exponent; - - /* If global SSEU is available, pin it to the default. This will ensure on - * Gfx11 for instance we use the full EU array. Initially when perf was - * enabled we would use only half on Gfx11 because of functional - * requirements. - * - * Temporary disable this option on Gfx12.5+, kernel doesn't appear to - * support it. - */ - if (intel_perf_has_global_sseu(perf_ctx->perf) && - perf_ctx->devinfo->verx10 < 125) { - properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; - properties[p++] = to_user_pointer(&perf_ctx->perf->sseu); - } - - assert(p <= ARRAY_SIZE(properties)); - - struct drm_i915_perf_open_param param = { - .flags = I915_PERF_FLAG_FD_CLOEXEC | - I915_PERF_FLAG_FD_NONBLOCK | - (enable ? 0 : I915_PERF_FLAG_DISABLED), - .num_properties = p / 2, - .properties_ptr = (uintptr_t) properties, - }; - int fd = intel_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); + int fd = intel_perf_stream_open(perf_ctx->perf, drm_fd, ctx_id, + metrics_set_id, period_exponent, false, + enable); if (fd == -1) { DBG("Error opening gen perf OA stream: %m\n"); return false; diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index b706f0a0d8627..0e9a959c87276 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -96,50 +96,11 @@ anv_device_perf_init(struct anv_device *device) static int anv_device_perf_open(struct anv_device *device, uint64_t metric_id) { - uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; - struct drm_i915_perf_open_param param; - int p = 0, stream_fd; + uint64_t period_exponent = 31; /* slowest sampling period */ - properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; - properties[p++] = true; - - properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; - properties[p++] = metric_id; - - properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = intel_perf_get_oa_format(device->physical->perf); - - properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; - properties[p++] = 31; /* slowest sampling period */ - - properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; - properties[p++] = device->context_id; - - properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION; - properties[p++] = true; - - /* If global SSEU is available, pin it to the default. This will ensure on - * Gfx11 for instance we use the full EU array. Initially when perf was - * enabled we would use only half on Gfx11 because of functional - * requirements. - * - * Temporary disable this option on Gfx12.5+, kernel doesn't appear to - * support it. - */ - if (intel_perf_has_global_sseu(device->physical->perf) && - device->info->verx10 < 125) { - properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; - properties[p++] = (uintptr_t) &device->physical->perf->sseu; - } - - memset(¶m, 0, sizeof(param)); - param.flags = 0; - param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK; - param.properties_ptr = (uintptr_t)properties; - param.num_properties = p / 2; - - stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); - return stream_fd; + return intel_perf_stream_open(device->physical->perf, device->fd, + device->context_id, metric_id, + period_exponent, true, true); } /* VK_INTEL_performance_query */ diff --git a/src/intel/vulkan_hasvk/anv_perf.c b/src/intel/vulkan_hasvk/anv_perf.c index a18b340884aff..4fae7d36ce47a 100644 --- a/src/intel/vulkan_hasvk/anv_perf.c +++ b/src/intel/vulkan_hasvk/anv_perf.c @@ -104,49 +104,11 @@ anv_device_perf_init(struct anv_device *device) static int anv_device_perf_open(struct anv_device *device, uint64_t metric_id) { - uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; - struct drm_i915_perf_open_param param; - int p = 0, stream_fd; + uint64_t period_exponent = 31; /* slowest sampling period */ - properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; - properties[p++] = true; - - properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; - properties[p++] = metric_id; - - properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = intel_perf_get_oa_format(device->physical->perf); - - properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; - properties[p++] = 31; /* slowest sampling period */ - - properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; - properties[p++] = device->context_id; - - properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION; - properties[p++] = true; - - /* If global SSEU is available, pin it to the default. This will ensure on - * Gfx11 for instance we use the full EU array. Initially when perf was - * enabled we would use only half on Gfx11 because of functional - * requirements. - * - * Temporary disable this option on Gfx12.5+, kernel doesn't appear to - * support it. - */ - if (intel_perf_has_global_sseu(device->physical->perf)) { - properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; - properties[p++] = (uintptr_t) &device->physical->perf->sseu; - } - - memset(¶m, 0, sizeof(param)); - param.flags = 0; - param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK; - param.properties_ptr = (uintptr_t)properties; - param.num_properties = p / 2; - - stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); - return stream_fd; + return intel_perf_stream_open(device->physical->perf, device->fd, + device->context_id, metric_id, + period_exponent, true, true); } /* VK_INTEL_performance_query */