pps: allow drivers to report timestamps in their own time domain
For this each driver must : - report its clock_id (if no particular clock just default to cpu boottime one) - be able to sample its clock (gpu_timestamp()) The PPSDataSource will then emit timestamp correlation events in the trace ensuring perfetto is able to display GPU & CPU events appropriately on its timeline. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13831>
This commit is contained in:
parent
457dbb81f5
commit
8657fa6b86
|
@ -389,4 +389,16 @@ FreedrenoDriver::counter(std::string name, Counter::Units units,
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
FreedrenoDriver::gpu_clock_id() const
|
||||||
|
{
|
||||||
|
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
FreedrenoDriver::gpu_timestamp() const
|
||||||
|
{
|
||||||
|
return perfetto::base::GetBootTimeNs().count();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pps
|
} // namespace pps
|
||||||
|
|
|
@ -28,6 +28,8 @@ public:
|
||||||
void disable_perfcnt() override;
|
void disable_perfcnt() override;
|
||||||
bool dump_perfcnt() override;
|
bool dump_perfcnt() override;
|
||||||
uint64_t next() override;
|
uint64_t next() override;
|
||||||
|
uint32_t gpu_clock_id() const override;
|
||||||
|
uint64_t gpu_timestamp() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct fd_device *dev;
|
struct fd_device *dev;
|
||||||
|
|
|
@ -400,4 +400,14 @@ uint64_t IntelDriver::next()
|
||||||
return cpu_next();
|
return cpu_next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t IntelDriver::gpu_clock_id() const
|
||||||
|
{
|
||||||
|
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t IntelDriver::gpu_timestamp() const
|
||||||
|
{
|
||||||
|
return perfetto::base::GetBootTimeNs().count();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pps
|
} // namespace pps
|
||||||
|
|
|
@ -51,7 +51,10 @@ class IntelDriver : public Driver
|
||||||
void disable_perfcnt() override;
|
void disable_perfcnt() override;
|
||||||
bool dump_perfcnt() override;
|
bool dump_perfcnt() override;
|
||||||
uint64_t next() override;
|
uint64_t next() override;
|
||||||
|
uint32_t gpu_clock_id() const override;
|
||||||
|
uint64_t gpu_timestamp() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
/// @brief Requests the next perf sample
|
/// @brief Requests the next perf sample
|
||||||
/// @return The sample GPU timestamp
|
/// @return The sample GPU timestamp
|
||||||
uint32_t gpu_next();
|
uint32_t gpu_next();
|
||||||
|
|
|
@ -157,4 +157,14 @@ void PanfrostDriver::disable_perfcnt()
|
||||||
enabled_counters.clear();
|
enabled_counters.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t PanfrostDriver::gpu_clock_id() const
|
||||||
|
{
|
||||||
|
return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t PanfrostDriver::gpu_timestamp() const
|
||||||
|
{
|
||||||
|
return perfetto::base::GetBootTimeNs().count();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pps
|
} // namespace pps
|
||||||
|
|
|
@ -41,6 +41,8 @@ class PanfrostDriver : public Driver
|
||||||
void disable_perfcnt() override;
|
void disable_perfcnt() override;
|
||||||
bool dump_perfcnt() override;
|
bool dump_perfcnt() override;
|
||||||
uint64_t next() override;
|
uint64_t next() override;
|
||||||
|
uint32_t gpu_clock_id() const override;
|
||||||
|
uint64_t gpu_timestamp() const override;
|
||||||
|
|
||||||
uint64_t last_dump_ts = 0;
|
uint64_t last_dump_ts = 0;
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
// Minimum supported sampling period in nanoseconds
|
// Minimum supported sampling period in nanoseconds
|
||||||
#define MIN_SAMPLING_PERIOD_NS 50000
|
#define MIN_SAMPLING_PERIOD_NS 50000
|
||||||
|
|
||||||
|
#define CORRELATION_TIMESTAMP_PERIOD (1000000000ull)
|
||||||
|
|
||||||
namespace pps
|
namespace pps
|
||||||
{
|
{
|
||||||
static std::string driver_name;
|
static std::string driver_name;
|
||||||
|
@ -232,11 +234,37 @@ void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver *driver)
|
||||||
|
{
|
||||||
|
uint32_t gpu_clock_id = driver->gpu_clock_id();
|
||||||
|
if (perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME == gpu_clock_id)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Send a correlation event between GPU & CPU timestamps
|
||||||
|
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||||
|
uint64_t gpu_ts = driver->gpu_timestamp();
|
||||||
|
|
||||||
|
{
|
||||||
|
auto clock = event->add_clocks();
|
||||||
|
|
||||||
|
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||||
|
clock->set_timestamp(cpu_ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto clock = event->add_clocks();
|
||||||
|
|
||||||
|
clock->set_clock_id(gpu_clock_id);
|
||||||
|
clock->set_timestamp(gpu_ts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GpuDataSource::trace(TraceContext &ctx)
|
void GpuDataSource::trace(TraceContext &ctx)
|
||||||
{
|
{
|
||||||
using namespace perfetto::protos::pbzero;
|
using namespace perfetto::protos::pbzero;
|
||||||
|
|
||||||
if (auto state = ctx.GetIncrementalState(); state->was_cleared) {
|
auto state = ctx.GetIncrementalState();
|
||||||
|
if (state->was_cleared) {
|
||||||
// Mark any incremental state before this point invalid
|
// Mark any incremental state before this point invalid
|
||||||
{
|
{
|
||||||
auto packet = ctx.NewTracePacket();
|
auto packet = ctx.NewTracePacket();
|
||||||
|
@ -248,14 +276,23 @@ void GpuDataSource::trace(TraceContext &ctx)
|
||||||
descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
|
descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
|
||||||
packet->set_timestamp(descriptor_timestamp);
|
packet->set_timestamp(descriptor_timestamp);
|
||||||
|
|
||||||
auto event = packet->set_gpu_counter_event();
|
{
|
||||||
event->set_gpu_id(driver->drm_device.gpu_num);
|
auto event = packet->set_gpu_counter_event();
|
||||||
|
event->set_gpu_id(driver->drm_device.gpu_num);
|
||||||
|
|
||||||
auto &groups = driver->groups;
|
auto &groups = driver->groups;
|
||||||
auto &counters = driver->enabled_counters;
|
auto &counters = driver->enabled_counters;
|
||||||
PPS_LOG("Sending counter descriptors");
|
PPS_LOG("Sending counter descriptors");
|
||||||
add_descriptors(event, groups, counters, *driver);
|
add_descriptors(event, groups, counters, *driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
last_correlation_timestamp = perfetto::base::GetBootTimeNs().count();
|
||||||
|
auto event = packet->set_clock_snapshot();
|
||||||
|
add_timestamp(event, driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
descriptor_gpu_timestamp = driver->gpu_timestamp();
|
||||||
state->was_cleared = false;
|
state->was_cleared = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,15 +309,16 @@ void GpuDataSource::trace(TraceContext &ctx)
|
||||||
sched_setscheduler(0, sched_policy, &priority_param);
|
sched_setscheduler(0, sched_policy, &priority_param);
|
||||||
|
|
||||||
if (driver->dump_perfcnt()) {
|
if (driver->dump_perfcnt()) {
|
||||||
while (auto timestamp = driver->next()) {
|
while (auto gpu_timestamp = driver->next()) {
|
||||||
if (timestamp <= descriptor_timestamp) {
|
if (gpu_timestamp <= descriptor_gpu_timestamp) {
|
||||||
// Do not send counter values before counter descriptors
|
// Do not send counter values before counter descriptors
|
||||||
PPS_LOG_ERROR("Skipping counter values coming before descriptors");
|
PPS_LOG_ERROR("Skipping counter values coming before descriptors");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto packet = ctx.NewTracePacket();
|
auto packet = ctx.NewTracePacket();
|
||||||
packet->set_timestamp(timestamp);
|
packet->set_timestamp_clock_id(driver->gpu_clock_id());
|
||||||
|
packet->set_timestamp(gpu_timestamp);
|
||||||
|
|
||||||
auto event = packet->set_gpu_counter_event();
|
auto event = packet->set_gpu_counter_event();
|
||||||
event->set_gpu_id(driver->drm_device.gpu_num);
|
event->set_gpu_id(driver->drm_device.gpu_num);
|
||||||
|
@ -289,6 +327,14 @@ void GpuDataSource::trace(TraceContext &ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||||
|
if ((cpu_ts - last_correlation_timestamp) > CORRELATION_TIMESTAMP_PERIOD) {
|
||||||
|
auto packet = ctx.NewTracePacket();
|
||||||
|
auto event = packet->set_clock_snapshot();
|
||||||
|
add_timestamp(event, driver);
|
||||||
|
last_correlation_timestamp = cpu_ts;
|
||||||
|
}
|
||||||
|
|
||||||
// Reset normal scheduler
|
// Reset normal scheduler
|
||||||
sched_setscheduler(0, prev_sched_policy, &prev_priority_param);
|
sched_setscheduler(0, prev_sched_policy, &prev_priority_param);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,12 +53,18 @@ class GpuDataSource : public perfetto::DataSource<GpuDataSource, GpuDataSourceTr
|
||||||
/// Used to check whether the datasource is quick enough
|
/// Used to check whether the datasource is quick enough
|
||||||
std::chrono::nanoseconds time_to_trace;
|
std::chrono::nanoseconds time_to_trace;
|
||||||
|
|
||||||
|
/// Last CPU timestamp at which we correlated CPU/GPU timestamps
|
||||||
|
uint64_t last_correlation_timestamp = 0;
|
||||||
|
|
||||||
/// A data source supports one driver at a time, but if you need more
|
/// A data source supports one driver at a time, but if you need more
|
||||||
/// than one gpu datasource you can just run another producer
|
/// than one gpu datasource you can just run another producer
|
||||||
Driver *driver = nullptr;
|
Driver *driver = nullptr;
|
||||||
|
|
||||||
/// Timestamp of packet sent with counter descriptors
|
/// CPU timestamp of packet sent with counter descriptors
|
||||||
uint64_t descriptor_timestamp = 0;
|
uint64_t descriptor_timestamp = 0;
|
||||||
|
|
||||||
|
/// GPU timestamp of packet sent with counter descriptors
|
||||||
|
uint64_t descriptor_gpu_timestamp = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pps
|
} // namespace pps
|
||||||
|
|
|
@ -72,9 +72,15 @@ class Driver
|
||||||
|
|
||||||
/// @brief After dumping performance counters, with this function you can iterate
|
/// @brief After dumping performance counters, with this function you can iterate
|
||||||
/// through the samples collected.
|
/// through the samples collected.
|
||||||
/// @return The CPU timestamp associated to current sample, or 0 if there are no more samples
|
/// @return The GPU timestamp associated to current sample, or 0 if there are no more samples
|
||||||
virtual uint64_t next() = 0;
|
virtual uint64_t next() = 0;
|
||||||
|
|
||||||
|
/// Clock ID in which the values returned by gpu_timestamp() belong
|
||||||
|
virtual uint32_t gpu_clock_id() const = 0;
|
||||||
|
|
||||||
|
/// Sample a timestamp from the GPU
|
||||||
|
virtual uint64_t gpu_timestamp() const = 0;
|
||||||
|
|
||||||
DrmDevice drm_device;
|
DrmDevice drm_device;
|
||||||
|
|
||||||
/// List of counter groups
|
/// List of counter groups
|
||||||
|
|
Loading…
Reference in New Issue