mesa/src/freedreno/ds/fd_pps_driver.cc

366 lines
9.0 KiB
C++

/*
* Copyright © 2021 Google, Inc.
*
* SPDX-License-Identifier: MIT
*/
#include "fd_pps_driver.h"
#include <cstring>
#include <iostream>
#include <perfetto.h>
#include "pps/pps.h"
#include "pps/pps_algorithm.h"
namespace pps
{
uint64_t
FreedrenoDriver::get_min_sampling_period_ns()
{
return 100000;
}
/*
TODO this sees like it would be largely the same for a5xx as well
(ie. same countable names)..
*/
void
FreedrenoDriver::setup_a6xx_counters()
{
/* TODO is there a reason to want more than one group? */
CounterGroup group = {};
group.name = "counters";
groups.clear();
counters.clear();
countables.clear();
enabled_counters.clear();
groups.emplace_back(std::move(group));
/*
* Create the countables that we'll be using.
*/
auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
/*
* And then setup the derived counters that we are exporting to
* pps based on the captured countable values
*/
counter("GPU Frequency", Counter::Units::Hertz, [=]() {
return PERF_CP_ALWAYS_COUNT / time;
}
);
counter("GPU % Utilization", Counter::Units::Percent, [=]() {
return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
}
);
// This one is a bit of a guess, but seems plausible..
counter("ALU / Fragment", Counter::Units::None, [=]() {
return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
}
);
counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
return PERF_TP_L1_CACHELINE_MISSES / time;
}
);
counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores);
}
);
// TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
// for what blob exposes
}
/**
* Generate an submit the cmdstream to configure the counter/countable
* muxing
*/
void
FreedrenoDriver::configure_counters(bool reset, bool wait)
{
struct fd_submit *submit = fd_submit_new(pipe);
enum fd_ringbuffer_flags flags =
(enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
for (auto countable : countables)
countable.configure(ring, reset);
struct fd_submit_fence fence = {};
util_queue_fence_init(&fence.ready);
fd_submit_flush(submit, -1, &fence);
util_queue_fence_wait(&fence.ready);
fd_ringbuffer_del(ring);
fd_submit_del(submit);
if (wait)
fd_pipe_wait(pipe, &fence.fence);
}
/**
* Read the current counter values and record the time.
*/
void
FreedrenoDriver::collect_countables()
{
last_dump_ts = perfetto::base::GetBootTimeNs().count();
for (auto countable : countables)
countable.collect();
}
bool
FreedrenoDriver::init_perfcnt()
{
uint64_t val;
dev = fd_device_new(drm_device.fd);
pipe = fd_pipe_new(dev, FD_PIPE_3D);
if (fd_pipe_get_param(pipe, FD_GPU_ID, &val)) {
PERFETTO_FATAL("Could not get GPU_ID");
return false;
}
gpu_id = val;
if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
PERFETTO_FATAL("Could not get MAX_FREQ");
return false;
}
max_freq = val;
perfcntrs = fd_perfcntrs(gpu_id, &num_perfcntrs);
if (num_perfcntrs == 0) {
PERFETTO_FATAL("No hw counters available");
return false;
}
assigned_counters.resize(num_perfcntrs);
assigned_counters.assign(assigned_counters.size(), 0);
switch (gpu_id) {
case 600 ... 699:
setup_a6xx_counters();
break;
default:
PERFETTO_FATAL("Unsupported GPU: a%03u", gpu_id);
return false;
}
state.resize(next_countable_id);
for (auto countable : countables)
countable.resolve();
freedreno_dev_info_init(&info, gpu_id);
io = fd_dt_find_io();
if (!io) {
PERFETTO_FATAL("Could not map GPU I/O space");
return false;
}
configure_counters(true, true);
collect_countables();
return true;
}
void
FreedrenoDriver::enable_counter(const uint32_t counter_id)
{
enabled_counters.push_back(counters[counter_id]);
}
void
FreedrenoDriver::enable_all_counters()
{
enabled_counters.reserve(counters.size());
for (auto &counter : counters) {
enabled_counters.push_back(counter);
}
}
void
FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
{
}
bool
FreedrenoDriver::dump_perfcnt()
{
auto last_ts = last_dump_ts;
collect_countables();
auto elapsed_time_ns = last_dump_ts - last_ts;
time = (float)elapsed_time_ns / 1000000000.0;
// TODO we want to do this periodically to keep the GPU awake
// (and to ensure we don't loose counter configuration due to
// suspend/resume cycle), but we don't' need to do this every
// time.. we probably just want to do this every 30-60ms..
configure_counters(false, false);
last_capture_ts = last_dump_ts;
return true;
}
uint64_t FreedrenoDriver::next()
{
auto ret = last_capture_ts;
last_capture_ts = 0;
return ret;
}
void FreedrenoDriver::disable_perfcnt()
{
/* There isn't really any disable, only reconfiguring which countables
* get muxed to which counters
*/
}
/*
* Countable
*/
FreedrenoDriver::Countable
FreedrenoDriver::countable(std::string name)
{
auto countable = Countable(this, name);
countables.emplace_back(countable);
return countable;
}
FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
: id {d->next_countable_id++}, d {d}, name {name}
{
}
/* Emit register writes on ring to configure counter/countable muxing: */
void
FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
{
const struct fd_perfcntr_countable *countable = d->state[id].countable;
const struct fd_perfcntr_counter *counter = d->state[id].counter;
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
if (counter->enable && reset) {
OUT_PKT4(ring, counter->enable, 1);
OUT_RING(ring, 0);
}
if (counter->clear && reset) {
OUT_PKT4(ring, counter->clear, 1);
OUT_RING(ring, 1);
OUT_PKT4(ring, counter->clear, 1);
OUT_RING(ring, 0);
}
OUT_PKT4(ring, counter->select_reg, 1);
OUT_RING(ring, countable->selector);
if (counter->enable && reset) {
OUT_PKT4(ring, counter->enable, 1);
OUT_RING(ring, 1);
}
}
/* Collect current counter value and calculate delta since last sample: */
void
FreedrenoDriver::Countable::collect()
{
const struct fd_perfcntr_counter *counter = d->state[id].counter;
d->state[id].last_value = d->state[id].value;
uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
uint32_t lo = *reg_lo;
uint32_t hi = *reg_hi;
d->state[id].value = lo | ((uint64_t)hi << 32);
}
/* Resolve the countable and assign next counter from it's group: */
void
FreedrenoDriver::Countable::resolve()
{
for (unsigned i = 0; i < d->num_perfcntrs; i++) {
const struct fd_perfcntr_group *g = &d->perfcntrs[i];
for (unsigned j = 0; j < g->num_countables; j++) {
const struct fd_perfcntr_countable *c = &g->countables[j];
if (name == c->name) {
d->state[id].countable = c;
/* Assign a counter from the same group: */
assert(d->assigned_counters[i] < g->num_counters);
d->state[id].counter = &g->counters[d->assigned_counters[i]++];
std::cout << "Countable: " << name << ", group=" << g->name <<
", counter=" << d->assigned_counters[i] - 1 << "\n";
return;
}
}
}
unreachable("no such countable!");
}
uint64_t
FreedrenoDriver::Countable::get_value() const
{
return d->state[id].value - d->state[id].last_value;
}
/*
* DerivedCounter
*/
FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
Counter::Units units,
std::function<int64_t()> derive)
: Counter(d->next_counter_id++, name, 0)
{
std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
this->units = units;
set_getter([=](const Counter &c, const Driver &d) {
return derive();
}
);
}
FreedrenoDriver::DerivedCounter
FreedrenoDriver::counter(std::string name, Counter::Units units,
std::function<int64_t()> derive)
{
auto counter = DerivedCounter(this, name, units, derive);
counters.emplace_back(counter);
return counter;
}
} // namespace pps