307 lines
10 KiB
C
307 lines
10 KiB
C
/*
|
|
* Copyright © 2020 Google, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#ifndef _U_TRACE_H
|
|
#define _U_TRACE_H
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
|
|
#include "util/u_queue.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* A trace mechanism (very) loosely inspired by the linux kernel tracepoint
|
|
* mechanism, in that it allows for defining driver specific (or common)
|
|
* tracepoints, which generate 'trace_$name()' functions that can be
|
|
* called at various points in commandstream emit.
|
|
*
|
|
* Currently a printf backend is implemented, but the expectation is to
|
|
* also implement a perfetto backend for shipping out traces to a tool like
|
|
* AGI.
|
|
*
|
|
* Notable differences:
|
|
*
|
|
* - GPU timestamps! A driver provided callback is used to emit timestamps
|
|
* to a buffer. At a later point in time (when stalling to wait for the
|
|
* GPU is not required), the timestamps are re-united with the trace
|
|
* payload. This makes the trace mechanism suitable for profiling.
|
|
*
|
|
* - Instead of a systemwide trace ringbuffer, buffering of un-retired
|
|
* tracepoints is split into two stages. Traces are emitted to a
|
|
* 'u_trace' instance, and at a later time flushed to a 'u_trace_context'
|
|
* instance. This avoids the requirement that commandstream containing
|
|
* tracepoints is emitted in the same order as it is generated.
|
|
*
|
|
* If the hw has multiple parallel "engines" (for example, 3d/blit/compute)
|
|
* then a `u_trace_context` per-engine should be used.
|
|
*
|
|
* - Unlike kernel tracepoints, u_trace tracepoints are defined in py
|
|
* from which header and src files are generated. Since we already have
|
|
* a build dependency on python+mako, this gives more flexibility than
|
|
* clunky preprocessor macro magic.
|
|
*
|
|
*/
|
|
|
|
struct u_trace_context;
|
|
struct u_trace;
|
|
struct u_trace_chunk;
|
|
struct u_trace_printer;
|
|
|
|
/**
|
|
* Special reserved value to indicate that no timestamp was captured,
|
|
* and that the timestamp of the previous trace should be reused.
|
|
*/
|
|
#define U_TRACE_NO_TIMESTAMP ((uint64_t)0)
|
|
|
|
/**
|
|
* Driver provided callback to create a timestamp buffer which will be
|
|
* read by u_trace_read_ts function.
|
|
*/
|
|
typedef void* (*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
|
|
uint32_t timestamps_count);
|
|
|
|
/**
|
|
* Driver provided callback to delete a timestamp buffer.
|
|
*/
|
|
typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
|
|
void *timestamps);
|
|
|
|
/**
|
|
* Driver provided callback to emit commands into the soecified command
|
|
* stream to capture a 64b timestamp into the specified timestamps buffer,
|
|
* at the specified index.
|
|
*
|
|
* The hw counter that the driver records should be something that runs at
|
|
* a fixed rate, even as the GPU freq changes. The same source used for
|
|
* GL_TIMESTAMP queries should be appropriate.
|
|
*/
|
|
typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs,
|
|
void *timestamps, unsigned idx,
|
|
bool end_of_pipe);
|
|
|
|
/**
|
|
* Driver provided callback to read back a previously recorded timestamp.
|
|
* If necessary, this should block until the GPU has finished writing back
|
|
* the timestamps. (The timestamps will be read back in order, so it is
|
|
* safe to only synchronize on idx==0.)
|
|
*
|
|
* flush_data is data provided by the driver via u_trace_flush.
|
|
*
|
|
* The returned timestamp should be in units of nanoseconds. The same
|
|
* timebase as GL_TIMESTAMP queries should be used.
|
|
*
|
|
* The driver can return the special U_TRACE_NO_TIMESTAMP value to indicate
|
|
* that no timestamp was captured and the timestamp from the previous trace
|
|
* will be re-used. (The first trace in the u_trace buf may not do this.)
|
|
* This allows the driver to detect cases where multiple tracepoints are
|
|
* emitted with no other intervening cmdstream, to avoid pointlessly
|
|
* capturing the same timestamp multiple times in a row.
|
|
*/
|
|
typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx,
|
|
void *timestamps, unsigned idx, void *flush_data);
|
|
|
|
/**
|
|
* Driver provided callback to delete flush data.
|
|
*/
|
|
typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx,
|
|
void *flush_data);
|
|
|
|
/**
|
|
* The trace context provides tracking for "in-flight" traces, once the
|
|
* cmdstream that records timestamps has been flushed.
|
|
*/
|
|
struct u_trace_context {
|
|
void *pctx;
|
|
|
|
u_trace_create_ts_buffer create_timestamp_buffer;
|
|
u_trace_delete_ts_buffer delete_timestamp_buffer;
|
|
u_trace_record_ts record_timestamp;
|
|
u_trace_read_ts read_timestamp;
|
|
u_trace_delete_flush_data delete_flush_data;
|
|
|
|
FILE *out;
|
|
struct u_trace_printer *out_printer;
|
|
|
|
/* Once u_trace_flush() is called u_trace_chunk's are queued up to
|
|
* render tracepoints on a queue. The per-chunk queue jobs block until
|
|
* timestamps are available.
|
|
*/
|
|
struct util_queue queue;
|
|
|
|
#ifdef HAVE_PERFETTO
|
|
/* node in global list of trace contexts. */
|
|
struct list_head node;
|
|
#endif
|
|
|
|
/* State to accumulate time across N chunks associated with a single
|
|
* batch (u_trace).
|
|
*/
|
|
uint64_t last_time_ns;
|
|
uint64_t first_time_ns;
|
|
|
|
uint32_t frame_nr;
|
|
uint32_t batch_nr;
|
|
uint32_t event_nr;
|
|
bool start_of_frame;
|
|
|
|
/* list of unprocessed trace chunks in fifo order: */
|
|
struct list_head flushed_trace_chunks;
|
|
};
|
|
|
|
/**
|
|
* The u_trace ptr is passed as the first arg to generated tracepoints.
|
|
* It provides buffering for tracepoint payload until the corresponding
|
|
* driver cmdstream containing the emitted commands to capture is
|
|
* flushed.
|
|
*
|
|
* Individual tracepoints emitted to u_trace are expected to be "executed"
|
|
* (ie. timestamp captured) in FIFO order with respect to other tracepoints
|
|
* emitted to the same u_trace. But the order WRT other u_trace instances
|
|
* is undefined util u_trace_flush().
|
|
*/
|
|
struct u_trace {
|
|
struct u_trace_context *utctx;
|
|
|
|
struct list_head trace_chunks; /* list of unflushed trace chunks in fifo order */
|
|
|
|
bool enabled;
|
|
};
|
|
|
|
void u_trace_context_init(struct u_trace_context *utctx,
|
|
void *pctx,
|
|
u_trace_create_ts_buffer create_timestamp_buffer,
|
|
u_trace_delete_ts_buffer delete_timestamp_buffer,
|
|
u_trace_record_ts record_timestamp,
|
|
u_trace_read_ts read_timestamp,
|
|
u_trace_delete_flush_data delete_flush_data);
|
|
void u_trace_context_fini(struct u_trace_context *utctx);
|
|
|
|
/**
|
|
* Flush (trigger processing) of traces previously flushed to the trace-context
|
|
* by u_trace_flush().
|
|
*
|
|
* This should typically be called in the driver's pctx->flush().
|
|
*/
|
|
void u_trace_context_process(struct u_trace_context *utctx, bool eof);
|
|
|
|
void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx);
|
|
void u_trace_fini(struct u_trace *ut);
|
|
|
|
bool u_trace_has_points(struct u_trace *ut);
|
|
|
|
struct u_trace_iterator
|
|
{
|
|
struct u_trace *ut;
|
|
struct u_trace_chunk *chunk;
|
|
uint32_t event_idx;
|
|
};
|
|
|
|
struct u_trace_iterator
|
|
u_trace_begin_iterator(struct u_trace *ut);
|
|
|
|
struct u_trace_iterator
|
|
u_trace_end_iterator(struct u_trace *ut);
|
|
|
|
bool
|
|
u_trace_iterator_equal(struct u_trace_iterator a,
|
|
struct u_trace_iterator b);
|
|
|
|
typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
|
|
void *cmdstream,
|
|
void *ts_from, uint32_t from_offset,
|
|
void *ts_to, uint32_t to_offset,
|
|
uint32_t count);
|
|
|
|
/**
|
|
* Clones tracepoints range into target u_trace.
|
|
* Provides callback for driver to copy timestamps on GPU from
|
|
* one buffer to another.
|
|
*
|
|
* It allows:
|
|
* - Tracing re-usable command buffer in Vulkan, by copying tracepoints
|
|
* each time it is submitted.
|
|
* - Per-tile tracing for tiling GPUs, by copying a range of tracepoints
|
|
* corresponding to a tile.
|
|
*/
|
|
void u_trace_clone_append(struct u_trace_iterator begin_it,
|
|
struct u_trace_iterator end_it,
|
|
struct u_trace *into,
|
|
void *cmdstream,
|
|
u_trace_copy_ts_buffer copy_ts_buffer);
|
|
|
|
void u_trace_disable_event_range(struct u_trace_iterator begin_it,
|
|
struct u_trace_iterator end_it);
|
|
|
|
/**
|
|
* Flush traces to the parent trace-context. At this point, the expectation
|
|
* is that all the tracepoints are "executed" by the GPU following any previously
|
|
* flushed u_trace batch.
|
|
*
|
|
* flush_data is a way for driver to pass additional data, which becomes available
|
|
* only at the point of flush, to the u_trace_read_ts callback and perfetto.
|
|
* The typical example of such data would be a fence to wait on in u_trace_read_ts,
|
|
* and a submission_id to pass into perfetto.
|
|
* The destruction of the data is done via u_trace_delete_flush_data.
|
|
*
|
|
* This should typically be called when the corresponding cmdstream (containing
|
|
* the timestamp reads) is flushed to the kernel.
|
|
*/
|
|
void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data);
|
|
|
|
/**
|
|
* Whether command buffers should be instrumented even if not collecting
|
|
* traces.
|
|
*/
|
|
extern bool ut_trace_instrument;
|
|
|
|
#ifdef HAVE_PERFETTO
|
|
extern int ut_perfetto_enabled;
|
|
|
|
void u_trace_perfetto_start(void);
|
|
void u_trace_perfetto_stop(void);
|
|
#else
|
|
# define ut_perfetto_enabled 0
|
|
#endif
|
|
|
|
static inline bool
|
|
u_trace_context_actively_tracing(struct u_trace_context *utctx)
|
|
{
|
|
return !!utctx->out || (ut_perfetto_enabled > 0);
|
|
}
|
|
|
|
static inline bool
|
|
u_trace_context_instrumenting(struct u_trace_context *utctx)
|
|
{
|
|
return !!utctx->out || ut_trace_instrument || (ut_perfetto_enabled > 0);
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _U_TRACE_H */
|