pps: allow drivers to report timestamps in their own time domain
For this each driver must : - report its clock_id (if no particular clock just default to cpu boottime one) - be able to sample its clock (gpu_timestamp()) The PPSDataSource will then emit timestamp correlation events in the trace ensuring perfetto is able to display GPU & CPU events appropriately on its timeline. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13831>
This commit is contained in:
parent
457dbb81f5
commit
8657fa6b86
|
@ -389,4 +389,16 @@ FreedrenoDriver::counter(std::string name, Counter::Units units,
|
|||
return counter;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
FreedrenoDriver::gpu_clock_id() const
|
||||
{
|
||||
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
FreedrenoDriver::gpu_timestamp() const
|
||||
{
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
}
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
@ -28,6 +28,8 @@ public:
|
|||
void disable_perfcnt() override;
|
||||
bool dump_perfcnt() override;
|
||||
uint64_t next() override;
|
||||
uint32_t gpu_clock_id() const override;
|
||||
uint64_t gpu_timestamp() const override;
|
||||
|
||||
private:
|
||||
struct fd_device *dev;
|
||||
|
|
|
@ -400,4 +400,14 @@ uint64_t IntelDriver::next()
|
|||
return cpu_next();
|
||||
}
|
||||
|
||||
uint32_t IntelDriver::gpu_clock_id() const
|
||||
{
|
||||
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||
}
|
||||
|
||||
uint64_t IntelDriver::gpu_timestamp() const
|
||||
{
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
}
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
@ -51,7 +51,10 @@ class IntelDriver : public Driver
|
|||
void disable_perfcnt() override;
|
||||
bool dump_perfcnt() override;
|
||||
uint64_t next() override;
|
||||
uint32_t gpu_clock_id() const override;
|
||||
uint64_t gpu_timestamp() const override;
|
||||
|
||||
private:
|
||||
/// @brief Requests the next perf sample
|
||||
/// @return The sample GPU timestamp
|
||||
uint32_t gpu_next();
|
||||
|
|
|
@ -157,4 +157,14 @@ void PanfrostDriver::disable_perfcnt()
|
|||
enabled_counters.clear();
|
||||
}
|
||||
|
||||
uint32_t PanfrostDriver::gpu_clock_id() const
|
||||
{
|
||||
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||
}
|
||||
|
||||
uint64_t PanfrostDriver::gpu_timestamp() const
|
||||
{
|
||||
return perfetto::base::GetBootTimeNs().count();
|
||||
}
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
@ -41,6 +41,8 @@ class PanfrostDriver : public Driver
|
|||
void disable_perfcnt() override;
|
||||
bool dump_perfcnt() override;
|
||||
uint64_t next() override;
|
||||
uint32_t gpu_clock_id() const override;
|
||||
uint64_t gpu_timestamp() const override;
|
||||
|
||||
uint64_t last_dump_ts = 0;
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
// Minimum supported sampling period in nanoseconds
|
||||
#define MIN_SAMPLING_PERIOD_NS 50000
|
||||
|
||||
#define CORRELATION_TIMESTAMP_PERIOD (1000000000ull)
|
||||
|
||||
namespace pps
|
||||
{
|
||||
static std::string driver_name;
|
||||
|
@ -232,11 +234,37 @@ void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver
|
|||
}
|
||||
}
|
||||
|
||||
void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver *driver)
|
||||
{
|
||||
uint32_t gpu_clock_id = driver->gpu_clock_id();
|
||||
if (perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME == gpu_clock_id)
|
||||
return;
|
||||
|
||||
// Send a correlation event between GPU & CPU timestamps
|
||||
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint64_t gpu_ts = driver->gpu_timestamp();
|
||||
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
clock->set_timestamp(cpu_ts);
|
||||
}
|
||||
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(gpu_clock_id);
|
||||
clock->set_timestamp(gpu_ts);
|
||||
}
|
||||
}
|
||||
|
||||
void GpuDataSource::trace(TraceContext &ctx)
|
||||
{
|
||||
using namespace perfetto::protos::pbzero;
|
||||
|
||||
if (auto state = ctx.GetIncrementalState(); state->was_cleared) {
|
||||
auto state = ctx.GetIncrementalState();
|
||||
if (state->was_cleared) {
|
||||
// Mark any incremental state before this point invalid
|
||||
{
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
@ -248,14 +276,23 @@ void GpuDataSource::trace(TraceContext &ctx)
|
|||
descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
|
||||
packet->set_timestamp(descriptor_timestamp);
|
||||
|
||||
auto event = packet->set_gpu_counter_event();
|
||||
event->set_gpu_id(driver->drm_device.gpu_num);
|
||||
{
|
||||
auto event = packet->set_gpu_counter_event();
|
||||
event->set_gpu_id(driver->drm_device.gpu_num);
|
||||
|
||||
auto &groups = driver->groups;
|
||||
auto &counters = driver->enabled_counters;
|
||||
PPS_LOG("Sending counter descriptors");
|
||||
add_descriptors(event, groups, counters, *driver);
|
||||
auto &groups = driver->groups;
|
||||
auto &counters = driver->enabled_counters;
|
||||
PPS_LOG("Sending counter descriptors");
|
||||
add_descriptors(event, groups, counters, *driver);
|
||||
}
|
||||
|
||||
{
|
||||
last_correlation_timestamp = perfetto::base::GetBootTimeNs().count();
|
||||
auto event = packet->set_clock_snapshot();
|
||||
add_timestamp(event, driver);
|
||||
}
|
||||
|
||||
descriptor_gpu_timestamp = driver->gpu_timestamp();
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
|
@ -272,15 +309,16 @@ void GpuDataSource::trace(TraceContext &ctx)
|
|||
sched_setscheduler(0, sched_policy, &priority_param);
|
||||
|
||||
if (driver->dump_perfcnt()) {
|
||||
while (auto timestamp = driver->next()) {
|
||||
if (timestamp <= descriptor_timestamp) {
|
||||
while (auto gpu_timestamp = driver->next()) {
|
||||
if (gpu_timestamp <= descriptor_gpu_timestamp) {
|
||||
// Do not send counter values before counter descriptors
|
||||
PPS_LOG_ERROR("Skipping counter values coming before descriptors");
|
||||
continue;
|
||||
}
|
||||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->set_timestamp(timestamp);
|
||||
packet->set_timestamp_clock_id(driver->gpu_clock_id());
|
||||
packet->set_timestamp(gpu_timestamp);
|
||||
|
||||
auto event = packet->set_gpu_counter_event();
|
||||
event->set_gpu_id(driver->drm_device.gpu_num);
|
||||
|
@ -289,6 +327,14 @@ void GpuDataSource::trace(TraceContext &ctx)
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
if ((cpu_ts - last_correlation_timestamp) > CORRELATION_TIMESTAMP_PERIOD) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
auto event = packet->set_clock_snapshot();
|
||||
add_timestamp(event, driver);
|
||||
last_correlation_timestamp = cpu_ts;
|
||||
}
|
||||
|
||||
// Reset normal scheduler
|
||||
sched_setscheduler(0, prev_sched_policy, &prev_priority_param);
|
||||
}
|
||||
|
|
|
@ -53,12 +53,18 @@ class GpuDataSource : public perfetto::DataSource<GpuDataSource, GpuDataSourceTr
|
|||
/// Used to check whether the datasource is quick enough
|
||||
std::chrono::nanoseconds time_to_trace;
|
||||
|
||||
/// Last CPU timestamp at which we correlated CPU/GPU timestamps
|
||||
uint64_t last_correlation_timestamp = 0;
|
||||
|
||||
/// A data source supports one driver at a time, but if you need more
|
||||
/// than one gpu datasource you can just run another producer
|
||||
Driver *driver = nullptr;
|
||||
|
||||
/// Timestamp of packet sent with counter descriptors
|
||||
/// CPU timestamp of packet sent with counter descriptors
|
||||
uint64_t descriptor_timestamp = 0;
|
||||
|
||||
/// GPU timestamp of packet sent with counter descriptors
|
||||
uint64_t descriptor_gpu_timestamp = 0;
|
||||
};
|
||||
|
||||
} // namespace pps
|
||||
|
|
|
@ -72,9 +72,15 @@ class Driver
|
|||
|
||||
/// @brief After dumping performance counters, with this function you can iterate
|
||||
/// through the samples collected.
|
||||
/// @return The CPU timestamp associated to current sample, or 0 if there are no more samples
|
||||
/// @return The GPU timestamp associated to current sample, or 0 if there are no more samples
|
||||
virtual uint64_t next() = 0;
|
||||
|
||||
/// Clock ID in which the values returned by gpu_timestamp() belong
|
||||
virtual uint32_t gpu_clock_id() const = 0;
|
||||
|
||||
/// Sample a timestamp from the GPU
|
||||
virtual uint64_t gpu_timestamp() const = 0;
|
||||
|
||||
DrmDevice drm_device;
|
||||
|
||||
/// List of counter groups
|
||||
|
|
Loading…
Reference in New Issue