freedreno: Add freedreno pps driver
Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Antonio Caggiano <antonio.caggiano@collabora.com> Acked-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9901>
This commit is contained in:
parent
ecfb00fb09
commit
3e13e45467
|
@ -475,6 +475,6 @@ option(
|
|||
'datasources',
|
||||
type : 'array',
|
||||
value : ['auto'],
|
||||
choices : ['auto', 'panfrost', 'intel'],
|
||||
choices : ['auto', 'panfrost', 'intel', 'freedreno'],
|
||||
description: 'List of Perfetto datasources to build. If this is set to `auto`, datasources that can not be build are skipped. Default: [`auto`]'
|
||||
)
|
||||
|
|
|
@ -0,0 +1,365 @@
|
|||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "fd_pps_driver.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <perfetto.h>
|
||||
|
||||
#include "pps/pps.h"
|
||||
#include "pps/pps_algorithm.h"
|
||||
|
||||
namespace pps
|
||||
{
|
||||
|
||||
uint64_t
|
||||
FreedrenoDriver::get_min_sampling_period_ns()
|
||||
{
|
||||
return 100000;
|
||||
}
|
||||
|
||||
/*
|
||||
TODO this sees like it would be largely the same for a5xx as well
|
||||
(ie. same countable names)..
|
||||
*/
|
||||
void
|
||||
FreedrenoDriver::setup_a6xx_counters()
|
||||
{
|
||||
/* TODO is there a reason to want more than one group? */
|
||||
CounterGroup group = {};
|
||||
group.name = "counters";
|
||||
groups.clear();
|
||||
counters.clear();
|
||||
countables.clear();
|
||||
enabled_counters.clear();
|
||||
groups.emplace_back(std::move(group));
|
||||
|
||||
/*
|
||||
* Create the countables that we'll be using.
|
||||
*/
|
||||
|
||||
auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
|
||||
auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
|
||||
auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
|
||||
auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
|
||||
auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
|
||||
auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
|
||||
auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
|
||||
|
||||
/*
|
||||
* And then setup the derived counters that we are exporting to
|
||||
* pps based on the captured countable values
|
||||
*/
|
||||
|
||||
counter("GPU Frequency", Counter::Units::Hertz, [=]() {
|
||||
return PERF_CP_ALWAYS_COUNT / time;
|
||||
}
|
||||
);
|
||||
|
||||
counter("GPU % Utilization", Counter::Units::Percent, [=]() {
|
||||
return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
|
||||
}
|
||||
);
|
||||
|
||||
// This one is a bit of a guess, but seems plausible..
|
||||
counter("ALU / Fragment", Counter::Units::None, [=]() {
|
||||
return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
|
||||
PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
|
||||
}
|
||||
);
|
||||
|
||||
counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
|
||||
return PERF_TP_L1_CACHELINE_MISSES / time;
|
||||
}
|
||||
);
|
||||
|
||||
counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
|
||||
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores);
|
||||
}
|
||||
);
|
||||
|
||||
// TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
|
||||
// for what blob exposes
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an submit the cmdstream to configure the counter/countable
|
||||
* muxing
|
||||
*/
|
||||
void
|
||||
FreedrenoDriver::configure_counters(bool reset, bool wait)
|
||||
{
|
||||
struct fd_submit *submit = fd_submit_new(pipe);
|
||||
enum fd_ringbuffer_flags flags =
|
||||
(enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
|
||||
|
||||
for (auto countable : countables)
|
||||
countable.configure(ring, reset);
|
||||
|
||||
struct fd_submit_fence fence = {};
|
||||
util_queue_fence_init(&fence.ready);
|
||||
|
||||
fd_submit_flush(submit, -1, &fence);
|
||||
|
||||
util_queue_fence_wait(&fence.ready);
|
||||
|
||||
fd_ringbuffer_del(ring);
|
||||
fd_submit_del(submit);
|
||||
|
||||
if (wait)
|
||||
fd_pipe_wait(pipe, &fence.fence);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the current counter values and record the time.
|
||||
*/
|
||||
void
|
||||
FreedrenoDriver::collect_countables()
|
||||
{
|
||||
last_dump_ts = perfetto::base::GetBootTimeNs().count();
|
||||
|
||||
for (auto countable : countables)
|
||||
countable.collect();
|
||||
}
|
||||
|
||||
bool
|
||||
FreedrenoDriver::init_perfcnt()
|
||||
{
|
||||
uint64_t val;
|
||||
|
||||
dev = fd_device_new(drm_device.fd);
|
||||
pipe = fd_pipe_new(dev, FD_PIPE_3D);
|
||||
|
||||
if (fd_pipe_get_param(pipe, FD_GPU_ID, &val)) {
|
||||
PERFETTO_FATAL("Could not get GPU_ID");
|
||||
return false;
|
||||
}
|
||||
gpu_id = val;
|
||||
|
||||
if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
|
||||
PERFETTO_FATAL("Could not get MAX_FREQ");
|
||||
return false;
|
||||
}
|
||||
max_freq = val;
|
||||
|
||||
perfcntrs = fd_perfcntrs(gpu_id, &num_perfcntrs);
|
||||
if (num_perfcntrs == 0) {
|
||||
PERFETTO_FATAL("No hw counters available");
|
||||
return false;
|
||||
}
|
||||
|
||||
assigned_counters.resize(num_perfcntrs);
|
||||
assigned_counters.assign(assigned_counters.size(), 0);
|
||||
|
||||
switch (gpu_id) {
|
||||
case 600 ... 699:
|
||||
setup_a6xx_counters();
|
||||
break;
|
||||
default:
|
||||
PERFETTO_FATAL("Unsupported GPU: a%03u", gpu_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
state.resize(next_countable_id);
|
||||
|
||||
for (auto countable : countables)
|
||||
countable.resolve();
|
||||
|
||||
freedreno_dev_info_init(&info, gpu_id);
|
||||
|
||||
io = fd_dt_find_io();
|
||||
if (!io) {
|
||||
PERFETTO_FATAL("Could not map GPU I/O space");
|
||||
return false;
|
||||
}
|
||||
|
||||
configure_counters(true, true);
|
||||
collect_countables();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
FreedrenoDriver::enable_counter(const uint32_t counter_id)
|
||||
{
|
||||
enabled_counters.push_back(counters[counter_id]);
|
||||
}
|
||||
|
||||
void
|
||||
FreedrenoDriver::enable_all_counters()
|
||||
{
|
||||
enabled_counters.reserve(counters.size());
|
||||
for (auto &counter : counters) {
|
||||
enabled_counters.push_back(counter);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
FreedrenoDriver::dump_perfcnt()
|
||||
{
|
||||
auto last_ts = last_dump_ts;
|
||||
|
||||
collect_countables();
|
||||
|
||||
auto elapsed_time_ns = last_dump_ts - last_ts;
|
||||
|
||||
time = (float)elapsed_time_ns / 1000000000.0;
|
||||
|
||||
// TODO we want to do this periodically to keep the GPU awake
|
||||
// (and to ensure we don't loose counter configuration due to
|
||||
// suspend/resume cycle), but we don't' need to do this every
|
||||
// time.. we probably just want to do this every 30-60ms..
|
||||
configure_counters(false, false);
|
||||
|
||||
last_capture_ts = last_dump_ts;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t FreedrenoDriver::next()
|
||||
{
|
||||
auto ret = last_capture_ts;
|
||||
last_capture_ts = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void FreedrenoDriver::disable_perfcnt()
|
||||
{
|
||||
/* There isn't really any disable, only reconfiguring which countables
|
||||
* get muxed to which counters
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Countable
|
||||
*/
|
||||
|
||||
FreedrenoDriver::Countable
|
||||
FreedrenoDriver::countable(std::string name)
|
||||
{
|
||||
auto countable = Countable(this, name);
|
||||
countables.emplace_back(countable);
|
||||
return countable;
|
||||
}
|
||||
|
||||
FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
|
||||
: id {d->next_countable_id++}, d {d}, name {name}
|
||||
{
|
||||
}
|
||||
|
||||
/* Emit register writes on ring to configure counter/countable muxing: */
|
||||
void
|
||||
FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
|
||||
{
|
||||
const struct fd_perfcntr_countable *countable = d->state[id].countable;
|
||||
const struct fd_perfcntr_counter *counter = d->state[id].counter;
|
||||
|
||||
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
if (counter->enable && reset) {
|
||||
OUT_PKT4(ring, counter->enable, 1);
|
||||
OUT_RING(ring, 0);
|
||||
}
|
||||
|
||||
if (counter->clear && reset) {
|
||||
OUT_PKT4(ring, counter->clear, 1);
|
||||
OUT_RING(ring, 1);
|
||||
|
||||
OUT_PKT4(ring, counter->clear, 1);
|
||||
OUT_RING(ring, 0);
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, counter->select_reg, 1);
|
||||
OUT_RING(ring, countable->selector);
|
||||
|
||||
if (counter->enable && reset) {
|
||||
OUT_PKT4(ring, counter->enable, 1);
|
||||
OUT_RING(ring, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Collect current counter value and calculate delta since last sample: */
|
||||
void
|
||||
FreedrenoDriver::Countable::collect()
|
||||
{
|
||||
const struct fd_perfcntr_counter *counter = d->state[id].counter;
|
||||
|
||||
d->state[id].last_value = d->state[id].value;
|
||||
|
||||
uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
|
||||
uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
|
||||
|
||||
uint32_t lo = *reg_lo;
|
||||
uint32_t hi = *reg_hi;
|
||||
|
||||
d->state[id].value = lo | ((uint64_t)hi << 32);
|
||||
}
|
||||
|
||||
/* Resolve the countable and assign next counter from it's group: */
|
||||
void
|
||||
FreedrenoDriver::Countable::resolve()
|
||||
{
|
||||
for (unsigned i = 0; i < d->num_perfcntrs; i++) {
|
||||
const struct fd_perfcntr_group *g = &d->perfcntrs[i];
|
||||
for (unsigned j = 0; j < g->num_countables; j++) {
|
||||
const struct fd_perfcntr_countable *c = &g->countables[j];
|
||||
if (name == c->name) {
|
||||
d->state[id].countable = c;
|
||||
|
||||
/* Assign a counter from the same group: */
|
||||
assert(d->assigned_counters[i] < g->num_counters);
|
||||
d->state[id].counter = &g->counters[d->assigned_counters[i]++];
|
||||
|
||||
std::cout << "Countable: " << name << ", group=" << g->name <<
|
||||
", counter=" << d->assigned_counters[i] - 1 << "\n";
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
unreachable("no such countable!");
|
||||
}
|
||||
|
||||
uint64_t
|
||||
FreedrenoDriver::Countable::get_value() const
|
||||
{
|
||||
return d->state[id].value - d->state[id].last_value;
|
||||
}
|
||||
|
||||
/*
|
||||
* DerivedCounter
|
||||
*/
|
||||
|
||||
FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
|
||||
Counter::Units units,
|
||||
std::function<int64_t()> derive)
|
||||
: Counter(d->next_counter_id++, name, 0)
|
||||
{
|
||||
std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
|
||||
this->units = units;
|
||||
set_getter([=](const Counter &c, const Driver &d) {
|
||||
return derive();
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
FreedrenoDriver::DerivedCounter
|
||||
FreedrenoDriver::counter(std::string name, Counter::Units units,
|
||||
std::function<int64_t()> derive)
|
||||
{
|
||||
auto counter = DerivedCounter(this, name, units, derive);
|
||||
counters.emplace_back(counter);
|
||||
return counter;
|
||||
}
|
||||
|
||||
} // namespace pps
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pps/pps_driver.h"
|
||||
|
||||
#include "common/freedreno_dev_info.h"
|
||||
#include "drm/freedreno_drmif.h"
|
||||
#include "drm/freedreno_ringbuffer.h"
|
||||
#include "perfcntrs/freedreno_dt.h"
|
||||
#include "perfcntrs/freedreno_perfcntr.h"
|
||||
|
||||
namespace pps
|
||||
{
|
||||
|
||||
class FreedrenoDriver : public Driver
|
||||
{
|
||||
public:
|
||||
uint64_t get_min_sampling_period_ns() override;
|
||||
bool init_perfcnt() override;
|
||||
void enable_counter(uint32_t counter_id) override;
|
||||
void enable_all_counters() override;
|
||||
void enable_perfcnt(uint64_t sampling_period_ns) override;
|
||||
void disable_perfcnt() override;
|
||||
bool dump_perfcnt() override;
|
||||
uint64_t next() override;
|
||||
|
||||
private:
|
||||
struct fd_device *dev;
|
||||
struct fd_pipe *pipe;
|
||||
uint32_t gpu_id;
|
||||
uint32_t max_freq;
|
||||
uint32_t next_counter_id;
|
||||
uint32_t next_countable_id;
|
||||
uint64_t last_dump_ts = 0;
|
||||
uint64_t last_capture_ts;
|
||||
|
||||
struct freedreno_dev_info info;
|
||||
|
||||
/**
|
||||
* The memory mapped i/o space for counter readback:
|
||||
*/
|
||||
void *io;
|
||||
|
||||
const struct fd_perfcntr_group *perfcntrs;
|
||||
unsigned num_perfcntrs;
|
||||
|
||||
/**
|
||||
* The number of counters assigned per perfcntr group, the index
|
||||
* into this matches the index into perfcntrs
|
||||
*/
|
||||
std::vector<int> assigned_counters;
|
||||
|
||||
/*
|
||||
* Values that can be used by derived counters evaluation
|
||||
*/
|
||||
float time; /* time since last sample in fraction of second */
|
||||
// uint32_t cycles; /* the number of clock cycles since last sample */
|
||||
|
||||
void setup_a6xx_counters();
|
||||
|
||||
void configure_counters(bool reset, bool wait);
|
||||
void collect_countables();
|
||||
|
||||
/**
|
||||
* Split out countable mutable state from the class so that copy-
|
||||
* constructor does something sane when lambda derive function
|
||||
* tries to get the countable value.
|
||||
*/
|
||||
struct CountableState {
|
||||
uint64_t last_value, value;
|
||||
const struct fd_perfcntr_countable *countable;
|
||||
const struct fd_perfcntr_counter *counter;
|
||||
};
|
||||
|
||||
std::vector<struct CountableState> state;
|
||||
|
||||
/**
|
||||
* Performance counters on adreno consist of sets of counters in various
|
||||
* blocks of the GPU, where each counter can be can be muxed to collect
|
||||
* one of a set of countables.
|
||||
*
|
||||
* But the countables tend to be too low level to be directly useful to
|
||||
* visualize. Instead various combinations of countables are combined
|
||||
* with various formulas to derive the high level "Counter" value exposed
|
||||
* via gfx-pps.
|
||||
*
|
||||
* This class serves to decouple the logic of those formulas from the
|
||||
* details of collecting countable values.
|
||||
*/
|
||||
class Countable {
|
||||
public:
|
||||
Countable(FreedrenoDriver *d, std::string name);
|
||||
|
||||
operator int64_t() const { return get_value(); };
|
||||
|
||||
void configure(struct fd_ringbuffer *ring, bool reset);
|
||||
void collect();
|
||||
void resolve();
|
||||
|
||||
private:
|
||||
|
||||
uint64_t get_value() const;
|
||||
|
||||
uint32_t id;
|
||||
FreedrenoDriver *d;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
Countable countable(std::string name);
|
||||
|
||||
std::vector<Countable> countables;
|
||||
|
||||
/**
|
||||
* A derived "Counter" (from pps's perspective)
|
||||
*/
|
||||
class DerivedCounter : public Counter {
|
||||
public:
|
||||
DerivedCounter(FreedrenoDriver *d, std::string name, Counter::Units units,
|
||||
std::function<int64_t()> derive);
|
||||
};
|
||||
|
||||
DerivedCounter counter(std::string name, Counter::Units units,
|
||||
std::function<int64_t()> derive);
|
||||
};
|
||||
|
||||
} // namespace pps
|
|
@ -0,0 +1,49 @@
|
|||
# Copyright © 2021 Collabora, Ltd.
|
||||
# Copyright © 2021 Google, Inc
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
pps_freedreno_lib = static_library(
|
||||
'freedreno-gpu',
|
||||
sources: [
|
||||
'fd_pps_driver.cc',
|
||||
'fd_pps_driver.h',
|
||||
freedreno_xml_header_files,
|
||||
],
|
||||
include_directories: [
|
||||
inc_tool,
|
||||
inc_src,
|
||||
inc_freedreno,
|
||||
inc_include,
|
||||
],
|
||||
dependencies: [
|
||||
dep_libdrm,
|
||||
dep_perfetto,
|
||||
],
|
||||
cpp_args: '-std=c++17'
|
||||
)
|
||||
|
||||
pps_freedreno_dep = declare_dependency(
|
||||
link_with: [
|
||||
pps_freedreno_lib,
|
||||
libfreedreno_common,
|
||||
libfreedreno_drm,
|
||||
libfreedreno_perfcntrs,
|
||||
],
|
||||
dependencies: [
|
||||
idep_mesautil,
|
||||
],
|
||||
include_directories: [
|
||||
inc_tool,
|
||||
inc_src,
|
||||
],
|
||||
compile_args: [
|
||||
'-DPPS_FREEDRENO',
|
||||
],
|
||||
)
|
||||
|
||||
pps_datasources += pps_freedreno_dep
|
||||
pps_includes += [
|
||||
inc_include,
|
||||
inc_freedreno,
|
||||
]
|
|
@ -50,6 +50,10 @@ subdir('fdl')
|
|||
subdir('perfcntrs')
|
||||
subdir('computerator')
|
||||
|
||||
if with_perfetto and (with_datasources.contains('freedreno') or with_datasources.contains('auto'))
|
||||
subdir('ds')
|
||||
endif
|
||||
|
||||
# Everything that depends on rnn requires (indirectly) libxml2:
|
||||
if dep_libxml2.found()
|
||||
subdir('rnn')
|
||||
|
|
|
@ -27,6 +27,7 @@ inc_amd_common = include_directories('amd/common')
|
|||
inc_amd_common_llvm = include_directories('amd/llvm')
|
||||
inc_tool = include_directories('tool')
|
||||
pps_datasources = []
|
||||
pps_includes = []
|
||||
|
||||
libglsl_util = static_library(
|
||||
'glsl_util',
|
||||
|
|
|
@ -19,7 +19,7 @@ pps_deps += pps_datasources
|
|||
lib_pps = static_library(
|
||||
'pps',
|
||||
sources: pps_sources,
|
||||
include_directories: [include_pps, inc_src],
|
||||
include_directories: [include_pps, inc_src, pps_includes],
|
||||
dependencies: pps_deps,
|
||||
cpp_args: '-std=c++17'
|
||||
)
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
#include <iterator>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef PPS_FREEDRENO
|
||||
#include "freedreno/ds/fd_pps_driver.h"
|
||||
#endif // PPS_FREEDRENO
|
||||
|
||||
#include "pps.h"
|
||||
#include "pps_algorithm.h"
|
||||
|
||||
|
@ -21,6 +25,11 @@ namespace pps
|
|||
std::unordered_map<std::string, std::unique_ptr<Driver>> create_supported_drivers()
|
||||
{
|
||||
std::unordered_map<std::string, std::unique_ptr<Driver>> map;
|
||||
|
||||
#ifdef PPS_FREEDRENO
|
||||
map.emplace("msm", std::make_unique<FreedrenoDriver>());
|
||||
#endif // PPS_FREEDRENO
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue