95 lines
2.9 KiB
C++
95 lines
2.9 KiB
C++
/*
|
|
* Copyright © 2020-2021 Collabora, Ltd.
|
|
* Author: Antonio Caggiano <antonio.caggiano@collabora.com>
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <pps/pps_driver.h>
|
|
|
|
extern "C" {
|
|
struct intel_perf_query_info;
|
|
};
|
|
|
|
namespace pps
|
|
{
|
|
|
|
class IntelPerf;
|
|
|
|
/// @brief Variable length sequence of bytes generated by Intel Obstervation Architecture (OA)
|
|
struct PerfRecord {
|
|
/// Timestamp in the GPU clock domain
|
|
uint64_t timestamp;
|
|
|
|
/// drm_i915_perf_record_header + report data
|
|
std::vector<uint8_t> data;
|
|
};
|
|
|
|
/// @brief PPS Driver implementation for Intel graphics devices.
|
|
/// When sampling it may collect multiple perf-records at once. Each perf-record holds multiple
|
|
/// counter values. Those values are continuously incremented by the GPU. In order to get a delta,
|
|
/// the driver computes an _accumulation_ (`last_perf_record - previous_perf_record`).
|
|
/// For optimization purposes, it might ignore some perf-records, considering only those
|
|
/// perf-records close to the boundary of the sampling period range.
|
|
class IntelDriver : public Driver
|
|
{
|
|
public:
|
|
IntelDriver();
|
|
~IntelDriver();
|
|
|
|
uint64_t get_min_sampling_period_ns() override;
|
|
bool init_perfcnt() override;
|
|
void enable_counter(uint32_t counter_id) override;
|
|
void enable_all_counters() override;
|
|
void enable_perfcnt(uint64_t sampling_period_ns) override;
|
|
void disable_perfcnt() override;
|
|
bool dump_perfcnt() override;
|
|
uint64_t next() override;
|
|
uint32_t gpu_clock_id() const override;
|
|
uint64_t gpu_timestamp() const override;
|
|
|
|
private:
|
|
/// @brief Requests the next perf sample
|
|
/// @return The sample GPU timestamp
|
|
uint64_t gpu_next();
|
|
|
|
/// @param data Buffer of bytes to parse
|
|
/// @param byte_count Number of bytes to parse
|
|
/// @return A list of perf records parsed from raw data passed as input
|
|
std::vector<PerfRecord> parse_perf_records(const std::vector<uint8_t> &data, size_t byte_count);
|
|
|
|
/// @brief Reads data from the GPU metric set
|
|
void read_data_from_metric_set();
|
|
|
|
/// Sampling period in nanoseconds requested by the datasource
|
|
uint64_t sampling_period_ns = 0;
|
|
|
|
/// Last upper 32bits of the GPU timestamp in the parsed reports
|
|
uint64_t gpu_timestamp_udw = 0;
|
|
|
|
/// Keep track of the timestamp of the last sample generated (upper & lower
|
|
/// 32bits)
|
|
uint64_t last_gpu_timestamp = 0;
|
|
|
|
/// Data buffer used to store data read from the metric set
|
|
std::vector<uint8_t> metric_buffer = std::vector<uint8_t>(1024, 0);
|
|
/// Number of bytes read so far still un-parsed.
|
|
/// Reset once bytes from the metric buffer are parsed to perf records
|
|
size_t total_bytes_read = 0;
|
|
|
|
/// List of OA perf records read so far
|
|
std::vector<PerfRecord> records;
|
|
|
|
std::unique_ptr<IntelPerf> perf;
|
|
|
|
// Gpu clock ID used to correlate GPU/CPU timestamps
|
|
uint32_t clock_id = 0;
|
|
|
|
// Selected query
|
|
intel_perf_query_info *selected_query = nullptr;
|
|
};
|
|
|
|
} // namespace pps
|