intel/perf: Add function to open perf stream

This will make easy to add Xe KMD support and reduce code duplication.
No changes in behavior are expected here.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29077>
This commit is contained in:
José Roberto de Souza 2024-04-30 12:30:44 -07:00 committed by Marge Bot
parent d27dcb815e
commit 73188a4590
7 changed files with 102 additions and 132 deletions

View File

@ -5,6 +5,7 @@
#include "perf/i915/intel_perf.h"
#include "common/intel_gem.h"
#include "perf/intel_perf.h"
#include "drm-uapi/i915_drm.h"
@ -18,3 +19,63 @@ uint64_t i915_perf_get_oa_format(struct intel_perf_config *perf)
else
return I915_OA_FORMAT_A24u40_A14u32_B8_C8;
}
int
i915_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t report_format, uint64_t period_exponent,
bool hold_preemption, bool enable)
{
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
uint32_t p = 0;
/* Single context sampling if valid context id. */
if (ctx_id != INTEL_PERF_INVALID_CTX_ID) {
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
properties[p++] = ctx_id;
}
/* Include OA reports in samples */
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
properties[p++] = true;
/* OA unit configuration */
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
properties[p++] = metrics_set_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = report_format;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = period_exponent;
if (hold_preemption) {
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
properties[p++] = true;
}
/* If global SSEU is available, pin it to the default. This will ensure on
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
*
* Not supported on Gfx12.5+.
*/
if (intel_perf_has_global_sseu(perf_config) &&
perf_config->devinfo->verx10 < 125) {
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
properties[p++] = (uintptr_t) &perf_config->sseu;
}
assert(p <= ARRAY_SIZE(properties));
struct drm_i915_perf_open_param param = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
(enable ? 0 : I915_PERF_FLAG_DISABLED),
.num_properties = p / 2,
.properties_ptr = (uintptr_t) properties,
};
int fd = intel_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
return fd > -1 ? fd : 0;
}

View File

@ -5,8 +5,14 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
struct intel_perf_config;
uint64_t i915_perf_get_oa_format(struct intel_perf_config *perf);
int i915_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t report_format, uint64_t period_exponent,
bool hold_preemption, bool enable);

View File

@ -1580,3 +1580,22 @@ intel_perf_get_oa_format(struct intel_perf_config *perf_cfg)
return 0;
}
}
int
intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t period_exponent, bool hold_preemption,
bool enable)
{
uint64_t report_format = intel_perf_get_oa_format(perf_config);
switch (perf_config->devinfo->kmd_type) {
case INTEL_KMD_TYPE_I915:
return i915_perf_stream_open(perf_config, drm_fd, ctx_id, metrics_set_id,
report_format, period_exponent,
hold_preemption, enable);
default:
unreachable("missing");
return 0;
}
}

View File

@ -565,6 +565,11 @@ void intel_perf_get_counters_passes(struct intel_perf_config *perf,
uint32_t counter_indices_count,
struct intel_perf_counter_pass *counter_pass);
int intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t period_exponent, bool hold_preemption,
bool enable);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -357,53 +357,9 @@ intel_perf_open(struct intel_perf_context *perf_ctx,
uint32_t ctx_id,
bool enable)
{
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
uint32_t p = 0;
/* Single context sampling if valid context id. */
if (ctx_id != INTEL_PERF_INVALID_CTX_ID) {
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
properties[p++] = ctx_id;
}
/* Include OA reports in samples */
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
properties[p++] = true;
/* OA unit configuration */
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
properties[p++] = metrics_set_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = report_format;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = period_exponent;
/* If global SSEU is available, pin it to the default. This will ensure on
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
*
* Temporary disable this option on Gfx12.5+, kernel doesn't appear to
* support it.
*/
if (intel_perf_has_global_sseu(perf_ctx->perf) &&
perf_ctx->devinfo->verx10 < 125) {
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
properties[p++] = to_user_pointer(&perf_ctx->perf->sseu);
}
assert(p <= ARRAY_SIZE(properties));
struct drm_i915_perf_open_param param = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
(enable ? 0 : I915_PERF_FLAG_DISABLED),
.num_properties = p / 2,
.properties_ptr = (uintptr_t) properties,
};
int fd = intel_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param);
int fd = intel_perf_stream_open(perf_ctx->perf, drm_fd, ctx_id,
metrics_set_id, period_exponent, false,
enable);
if (fd == -1) {
DBG("Error opening gen perf OA stream: %m\n");
return false;

View File

@ -96,50 +96,11 @@ anv_device_perf_init(struct anv_device *device)
static int
anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
{
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
struct drm_i915_perf_open_param param;
int p = 0, stream_fd;
uint64_t period_exponent = 31; /* slowest sampling period */
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
properties[p++] = true;
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = intel_perf_get_oa_format(device->physical->perf);
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
properties[p++] = device->context_id;
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
properties[p++] = true;
/* If global SSEU is available, pin it to the default. This will ensure on
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
*
* Temporary disable this option on Gfx12.5+, kernel doesn't appear to
* support it.
*/
if (intel_perf_has_global_sseu(device->physical->perf) &&
device->info->verx10 < 125) {
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
}
memset(&param, 0, sizeof(param));
param.flags = 0;
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
param.properties_ptr = (uintptr_t)properties;
param.num_properties = p / 2;
stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
return stream_fd;
return intel_perf_stream_open(device->physical->perf, device->fd,
device->context_id, metric_id,
period_exponent, true, true);
}
/* VK_INTEL_performance_query */

View File

@ -104,49 +104,11 @@ anv_device_perf_init(struct anv_device *device)
static int
anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
{
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
struct drm_i915_perf_open_param param;
int p = 0, stream_fd;
uint64_t period_exponent = 31; /* slowest sampling period */
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
properties[p++] = true;
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = intel_perf_get_oa_format(device->physical->perf);
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
properties[p++] = device->context_id;
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
properties[p++] = true;
/* If global SSEU is available, pin it to the default. This will ensure on
* Gfx11 for instance we use the full EU array. Initially when perf was
* enabled we would use only half on Gfx11 because of functional
* requirements.
*
* Temporary disable this option on Gfx12.5+, kernel doesn't appear to
* support it.
*/
if (intel_perf_has_global_sseu(device->physical->perf)) {
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
}
memset(&param, 0, sizeof(param));
param.flags = 0;
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
param.properties_ptr = (uintptr_t)properties;
param.num_properties = p / 2;
stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
return stream_fd;
return intel_perf_stream_open(device->physical->perf, device->fd,
device->context_id, metric_id,
period_exponent, true, true);
}
/* VK_INTEL_performance_query */