intel/ds: allow user to select metric set at start time
Rather than using always the same metric set, let the user choose when starting the producer with : INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
This commit is contained in:
parent
69df00b33b
commit
6eb554a9c7
|
@ -154,6 +154,13 @@ Another option to enable access wide data without root permissions would be runn
|
|||
|
||||
Alternatively using the ``CAP_PERFMON`` permission on the binary should work too.
|
||||
|
||||
A particular metric set can also be selected to capture a different
|
||||
set of HW counters :
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
|
||||
|
||||
Panfrost
|
||||
^^^^^^^^
|
||||
|
||||
|
|
|
@ -58,38 +58,17 @@ IntelDriver::~IntelDriver()
|
|||
void IntelDriver::enable_counter(uint32_t counter_id)
|
||||
{
|
||||
auto &counter = counters[counter_id];
|
||||
auto &group = groups[counter.group];
|
||||
if (perf->query) {
|
||||
if (perf->query->symbol_name != group.name) {
|
||||
PPS_LOG_ERROR(
|
||||
"Unable to enable metrics from different sets: %u "
|
||||
"belongs to %s but %s is currently in use.",
|
||||
counter_id,
|
||||
perf->query->symbol_name,
|
||||
group.name.c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
enabled_counters.emplace_back(counter);
|
||||
if (!perf->query) {
|
||||
perf->query = perf->find_query_by_name(group.name);
|
||||
}
|
||||
}
|
||||
|
||||
void IntelDriver::enable_all_counters()
|
||||
{
|
||||
// We can only enable one metric set at a time so at least enable one.
|
||||
for (auto &group : groups) {
|
||||
if (group.name == "RenderBasic") {
|
||||
for (uint32_t counter_id : group.counters) {
|
||||
auto &counter = counters[counter_id];
|
||||
enabled_counters.emplace_back(counter);
|
||||
}
|
||||
|
||||
perf->query = perf->find_query_by_name(group.name);
|
||||
break;
|
||||
}
|
||||
// We should only have one group
|
||||
assert(groups.size() == 1);
|
||||
for (uint32_t counter_id : groups[0].counters) {
|
||||
auto &counter = counters[counter_id];
|
||||
enabled_counters.emplace_back(counter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,49 +78,76 @@ bool IntelDriver::init_perfcnt()
|
|||
|
||||
perf = std::make_unique<IntelPerf>(drm_device.fd);
|
||||
|
||||
const char *metric_set_name = getenv("INTEL_PERFETTO_METRIC_SET");
|
||||
|
||||
struct intel_perf_query_info *default_query = nullptr;
|
||||
selected_query = nullptr;
|
||||
for (auto &query : perf->get_queries()) {
|
||||
// Create group
|
||||
CounterGroup group = {};
|
||||
group.id = groups.size();
|
||||
group.name = query->symbol_name;
|
||||
|
||||
for (int i = 0; i < query->n_counters; ++i) {
|
||||
intel_perf_query_counter &counter = query->counters[i];
|
||||
|
||||
// Create counter
|
||||
Counter counter_desc = {};
|
||||
counter_desc.id = counters.size();
|
||||
counter_desc.name = counter.symbol_name;
|
||||
counter_desc.group = group.id;
|
||||
counter_desc.getter = [counter, query, this](
|
||||
const Counter &c, const Driver &dri) -> Counter::Value {
|
||||
switch (counter.data_type) {
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
|
||||
return (int64_t)counter.oa_counter_read_uint64(perf->cfg, query, &perf->result);
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
||||
return counter.oa_counter_read_float(perf->cfg, query, &perf->result);
|
||||
break;
|
||||
}
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
// Add counter id to the group
|
||||
group.counters.emplace_back(counter_desc.id);
|
||||
|
||||
// Store counter
|
||||
counters.emplace_back(std::move(counter_desc));
|
||||
}
|
||||
|
||||
// Store group
|
||||
groups.emplace_back(std::move(group));
|
||||
if (!strcmp(query->symbol_name, "RenderBasic"))
|
||||
default_query = query;
|
||||
if (metric_set_name && !strcmp(query->symbol_name, metric_set_name))
|
||||
selected_query = query;
|
||||
}
|
||||
|
||||
assert(groups.size() && "Failed to query groups");
|
||||
assert(default_query);
|
||||
|
||||
if (!selected_query) {
|
||||
if (metric_set_name) {
|
||||
PPS_LOG_ERROR("Available metric sets:");
|
||||
for (auto &query : perf->get_queries())
|
||||
PPS_LOG_ERROR(" %s", query->symbol_name);
|
||||
PPS_LOG_FATAL("Metric set '%s' not available.", metric_set_name);
|
||||
}
|
||||
selected_query = default_query;
|
||||
}
|
||||
|
||||
PPS_LOG("Using metric set '%s': %s",
|
||||
selected_query->symbol_name, selected_query->name);
|
||||
|
||||
// Create group
|
||||
CounterGroup group = {};
|
||||
group.id = groups.size();
|
||||
group.name = selected_query->symbol_name;
|
||||
|
||||
for (int i = 0; i < selected_query->n_counters; ++i) {
|
||||
intel_perf_query_counter &counter = selected_query->counters[i];
|
||||
|
||||
// Create counter
|
||||
Counter counter_desc = {};
|
||||
counter_desc.id = counters.size();
|
||||
counter_desc.name = counter.symbol_name;
|
||||
counter_desc.group = group.id;
|
||||
counter_desc.getter = [counter, this](
|
||||
const Counter &c, const Driver &dri) -> Counter::Value {
|
||||
switch (counter.data_type) {
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
|
||||
return (int64_t)counter.oa_counter_read_uint64(perf->cfg,
|
||||
selected_query,
|
||||
&perf->result);
|
||||
break;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
||||
return counter.oa_counter_read_float(perf->cfg,
|
||||
selected_query,
|
||||
&perf->result);
|
||||
break;
|
||||
}
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
// Add counter id to the group
|
||||
group.counters.emplace_back(counter_desc.id);
|
||||
|
||||
// Store counter
|
||||
counters.emplace_back(std::move(counter_desc));
|
||||
}
|
||||
|
||||
// Store group
|
||||
groups.emplace_back(std::move(group));
|
||||
|
||||
assert(counters.size() && "Failed to query counters");
|
||||
|
||||
// Clear accumulations
|
||||
|
@ -154,7 +160,7 @@ void IntelDriver::enable_perfcnt(uint64_t sampling_period_ns)
|
|||
{
|
||||
this->sampling_period_ns = sampling_period_ns;
|
||||
|
||||
if (!perf->open(sampling_period_ns)) {
|
||||
if (!perf->open(sampling_period_ns, selected_query)) {
|
||||
PPS_LOG_FATAL("Failed to open intel perf");
|
||||
}
|
||||
}
|
||||
|
@ -197,7 +203,7 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
|
|||
// Report is next to the header
|
||||
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
|
||||
uint64_t gpu_timestamp_ldw =
|
||||
intel_perf_report_timestamp(&perf->query.value(), report);
|
||||
intel_perf_report_timestamp(selected_query, report);
|
||||
|
||||
/* Our HW only provides us with the lower 32 bits of the 36bits
|
||||
* timestamp counter value. If we haven't captured the top bits yet,
|
||||
|
@ -292,11 +298,11 @@ uint64_t IntelDriver::gpu_next()
|
|||
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data.data());
|
||||
|
||||
intel_perf_query_result_accumulate_fields(&perf->result,
|
||||
&perf->query.value(),
|
||||
&perf->devinfo,
|
||||
record_a + 1,
|
||||
record_b + 1,
|
||||
false /* no_oa_accumulate */);
|
||||
selected_query,
|
||||
&perf->devinfo,
|
||||
record_a + 1,
|
||||
record_b + 1,
|
||||
false /* no_oa_accumulate */);
|
||||
|
||||
// Get last timestamp
|
||||
auto gpu_timestamp = records[1].timestamp;
|
||||
|
|
|
@ -9,6 +9,10 @@
|
|||
|
||||
#include <pps/pps_driver.h>
|
||||
|
||||
extern "C" {
|
||||
struct intel_perf_query_info;
|
||||
};
|
||||
|
||||
namespace pps
|
||||
{
|
||||
|
||||
|
@ -82,6 +86,9 @@ class IntelDriver : public Driver
|
|||
|
||||
// Gpu clock ID used to correlate GPU/CPU timestamps
|
||||
uint32_t clock_id = 0;
|
||||
|
||||
// Selected query
|
||||
intel_perf_query_info *selected_query = nullptr;
|
||||
};
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
@ -36,13 +36,6 @@ IntelPerf::IntelPerf(const int drm_fd)
|
|||
false, // no pipeline statistics
|
||||
false // no register snapshots
|
||||
);
|
||||
|
||||
// Enable RenderBasic counters
|
||||
auto query_name = "RenderBasic";
|
||||
query = find_query_by_name(query_name);
|
||||
if (!query) {
|
||||
PPS_LOG_FATAL("Failed to find %s query", query_name);
|
||||
}
|
||||
}
|
||||
|
||||
IntelPerf::~IntelPerf()
|
||||
|
@ -58,20 +51,6 @@ IntelPerf::~IntelPerf()
|
|||
}
|
||||
}
|
||||
|
||||
/// @return A query info, which is something like a group of counters
|
||||
std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
|
||||
const std::string &name) const
|
||||
{
|
||||
for (int i = 0; i < cfg->n_queries; ++i) {
|
||||
struct intel_perf_query_info query = cfg->queries[i];
|
||||
if (name == query.symbol_name) {
|
||||
return query;
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
|
||||
{
|
||||
assert(cfg && "Intel perf config should be valid");
|
||||
|
@ -98,7 +77,8 @@ static uint32_t get_oa_exponent(const intel_device_info *devinfo, const uint64_t
|
|||
return static_cast<uint32_t>(log2(sampling_period_ns * devinfo->timestamp_frequency / 1000000000ull)) - 1;
|
||||
}
|
||||
|
||||
bool IntelPerf::open(const uint64_t sampling_period_ns)
|
||||
bool IntelPerf::open(const uint64_t sampling_period_ns,
|
||||
struct intel_perf_query_info *query)
|
||||
{
|
||||
assert(!ctx && "Perf context should not be initialized at this point");
|
||||
|
||||
|
|
|
@ -23,11 +23,9 @@ class IntelPerf
|
|||
IntelPerf(int drm_fd);
|
||||
~IntelPerf();
|
||||
|
||||
std::optional<struct intel_perf_query_info> find_query_by_name(const std::string &name) const;
|
||||
|
||||
std::vector<struct intel_perf_query_info*> get_queries() const;
|
||||
|
||||
bool open(uint64_t sampling_period_ns);
|
||||
bool open(uint64_t sampling_period_ns, struct intel_perf_query_info *query);
|
||||
void close();
|
||||
|
||||
bool oa_stream_ready() const;
|
||||
|
@ -45,8 +43,6 @@ class IntelPerf
|
|||
struct intel_perf_query_result result = {};
|
||||
|
||||
struct intel_device_info devinfo = {};
|
||||
|
||||
std::optional<struct intel_perf_query_info> query = std::nullopt;
|
||||
};
|
||||
|
||||
} // namespace pps
|
||||
|
|
Loading…
Reference in New Issue