intel/ds: allow user to select metric set at start time

Rather than using always the same metric set, let the user choose when
starting the producer with :

  INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
This commit is contained in:
Lionel Landwerlin 2021-11-22 16:24:43 +02:00 committed by Marge Bot
parent 69df00b33b
commit 6eb554a9c7
5 changed files with 96 additions and 100 deletions

View File

@ -154,6 +154,13 @@ Another option to enable access wide data without root permissions would be runn
Alternatively using the ``CAP_PERFMON`` permission on the binary should work too.
A particular metric set can also be selected to capture a different
set of HW counters :
.. code-block:: console
INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
Panfrost
^^^^^^^^

View File

@ -58,38 +58,17 @@ IntelDriver::~IntelDriver()
void IntelDriver::enable_counter(uint32_t counter_id)
{
auto &counter = counters[counter_id];
auto &group = groups[counter.group];
if (perf->query) {
if (perf->query->symbol_name != group.name) {
PPS_LOG_ERROR(
"Unable to enable metrics from different sets: %u "
"belongs to %s but %s is currently in use.",
counter_id,
perf->query->symbol_name,
group.name.c_str());
return;
}
}
enabled_counters.emplace_back(counter);
if (!perf->query) {
perf->query = perf->find_query_by_name(group.name);
}
}
void IntelDriver::enable_all_counters()
{
// We can only enable one metric set at a time so at least enable one.
for (auto &group : groups) {
if (group.name == "RenderBasic") {
for (uint32_t counter_id : group.counters) {
auto &counter = counters[counter_id];
enabled_counters.emplace_back(counter);
}
perf->query = perf->find_query_by_name(group.name);
break;
}
// We should only have one group
assert(groups.size() == 1);
for (uint32_t counter_id : groups[0].counters) {
auto &counter = counters[counter_id];
enabled_counters.emplace_back(counter);
}
}
@ -99,49 +78,76 @@ bool IntelDriver::init_perfcnt()
perf = std::make_unique<IntelPerf>(drm_device.fd);
const char *metric_set_name = getenv("INTEL_PERFETTO_METRIC_SET");
struct intel_perf_query_info *default_query = nullptr;
selected_query = nullptr;
for (auto &query : perf->get_queries()) {
// Create group
CounterGroup group = {};
group.id = groups.size();
group.name = query->symbol_name;
for (int i = 0; i < query->n_counters; ++i) {
intel_perf_query_counter &counter = query->counters[i];
// Create counter
Counter counter_desc = {};
counter_desc.id = counters.size();
counter_desc.name = counter.symbol_name;
counter_desc.group = group.id;
counter_desc.getter = [counter, query, this](
const Counter &c, const Driver &dri) -> Counter::Value {
switch (counter.data_type) {
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
return (int64_t)counter.oa_counter_read_uint64(perf->cfg, query, &perf->result);
break;
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
return counter.oa_counter_read_float(perf->cfg, query, &perf->result);
break;
}
return {};
};
// Add counter id to the group
group.counters.emplace_back(counter_desc.id);
// Store counter
counters.emplace_back(std::move(counter_desc));
}
// Store group
groups.emplace_back(std::move(group));
if (!strcmp(query->symbol_name, "RenderBasic"))
default_query = query;
if (metric_set_name && !strcmp(query->symbol_name, metric_set_name))
selected_query = query;
}
assert(groups.size() && "Failed to query groups");
assert(default_query);
if (!selected_query) {
if (metric_set_name) {
PPS_LOG_ERROR("Available metric sets:");
for (auto &query : perf->get_queries())
PPS_LOG_ERROR(" %s", query->symbol_name);
PPS_LOG_FATAL("Metric set '%s' not available.", metric_set_name);
}
selected_query = default_query;
}
PPS_LOG("Using metric set '%s': %s",
selected_query->symbol_name, selected_query->name);
// Create group
CounterGroup group = {};
group.id = groups.size();
group.name = selected_query->symbol_name;
for (int i = 0; i < selected_query->n_counters; ++i) {
intel_perf_query_counter &counter = selected_query->counters[i];
// Create counter
Counter counter_desc = {};
counter_desc.id = counters.size();
counter_desc.name = counter.symbol_name;
counter_desc.group = group.id;
counter_desc.getter = [counter, this](
const Counter &c, const Driver &dri) -> Counter::Value {
switch (counter.data_type) {
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
return (int64_t)counter.oa_counter_read_uint64(perf->cfg,
selected_query,
&perf->result);
break;
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
return counter.oa_counter_read_float(perf->cfg,
selected_query,
&perf->result);
break;
}
return {};
};
// Add counter id to the group
group.counters.emplace_back(counter_desc.id);
// Store counter
counters.emplace_back(std::move(counter_desc));
}
// Store group
groups.emplace_back(std::move(group));
assert(counters.size() && "Failed to query counters");
// Clear accumulations
@ -154,7 +160,7 @@ void IntelDriver::enable_perfcnt(uint64_t sampling_period_ns)
{
this->sampling_period_ns = sampling_period_ns;
if (!perf->open(sampling_period_ns)) {
if (!perf->open(sampling_period_ns, selected_query)) {
PPS_LOG_FATAL("Failed to open intel perf");
}
}
@ -197,7 +203,7 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
// Report is next to the header
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
uint64_t gpu_timestamp_ldw =
intel_perf_report_timestamp(&perf->query.value(), report);
intel_perf_report_timestamp(selected_query, report);
/* Our HW only provides us with the lower 32 bits of the 36bits
* timestamp counter value. If we haven't captured the top bits yet,
@ -292,11 +298,11 @@ uint64_t IntelDriver::gpu_next()
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data.data());
intel_perf_query_result_accumulate_fields(&perf->result,
&perf->query.value(),
&perf->devinfo,
record_a + 1,
record_b + 1,
false /* no_oa_accumulate */);
selected_query,
&perf->devinfo,
record_a + 1,
record_b + 1,
false /* no_oa_accumulate */);
// Get last timestamp
auto gpu_timestamp = records[1].timestamp;

View File

@ -9,6 +9,10 @@
#include <pps/pps_driver.h>
extern "C" {
struct intel_perf_query_info;
};
namespace pps
{
@ -82,6 +86,9 @@ class IntelDriver : public Driver
// Gpu clock ID used to correlate GPU/CPU timestamps
uint32_t clock_id = 0;
// Selected query
intel_perf_query_info *selected_query = nullptr;
};
} // namespace pps

View File

@ -36,13 +36,6 @@ IntelPerf::IntelPerf(const int drm_fd)
false, // no pipeline statistics
false // no register snapshots
);
// Enable RenderBasic counters
auto query_name = "RenderBasic";
query = find_query_by_name(query_name);
if (!query) {
PPS_LOG_FATAL("Failed to find %s query", query_name);
}
}
IntelPerf::~IntelPerf()
@ -58,20 +51,6 @@ IntelPerf::~IntelPerf()
}
}
/// @return A query info, which is something like a group of counters
std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
const std::string &name) const
{
for (int i = 0; i < cfg->n_queries; ++i) {
struct intel_perf_query_info query = cfg->queries[i];
if (name == query.symbol_name) {
return query;
}
}
return std::nullopt;
}
std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
{
assert(cfg && "Intel perf config should be valid");
@ -98,7 +77,8 @@ static uint32_t get_oa_exponent(const intel_device_info *devinfo, const uint64_t
return static_cast<uint32_t>(log2(sampling_period_ns * devinfo->timestamp_frequency / 1000000000ull)) - 1;
}
bool IntelPerf::open(const uint64_t sampling_period_ns)
bool IntelPerf::open(const uint64_t sampling_period_ns,
struct intel_perf_query_info *query)
{
assert(!ctx && "Perf context should not be initialized at this point");

View File

@ -23,11 +23,9 @@ class IntelPerf
IntelPerf(int drm_fd);
~IntelPerf();
std::optional<struct intel_perf_query_info> find_query_by_name(const std::string &name) const;
std::vector<struct intel_perf_query_info*> get_queries() const;
bool open(uint64_t sampling_period_ns);
bool open(uint64_t sampling_period_ns, struct intel_perf_query_info *query);
void close();
bool oa_stream_ready() const;
@ -45,8 +43,6 @@ class IntelPerf
struct intel_perf_query_result result = {};
struct intel_device_info devinfo = {};
std::optional<struct intel_perf_query_info> query = std::nullopt;
};
} // namespace pps