turnip: implement basic perfetto support
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Reviewed-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Hyunjun Ko <zzoon@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10969>
This commit is contained in:
parent
cefaa73909
commit
3dd1bb6355
|
@ -50,6 +50,7 @@ libtu_files = files(
|
|||
'tu_shader.c',
|
||||
'tu_util.c',
|
||||
'tu_util.h',
|
||||
'tu_perfetto.h',
|
||||
'vk_format.h',
|
||||
)
|
||||
|
||||
|
@ -106,9 +107,28 @@ else
|
|||
tu_deps += dep_libdrm
|
||||
endif
|
||||
|
||||
tu_tracepoints = custom_target(
|
||||
'tu_tracepoints.[ch]',
|
||||
input: 'tu_tracepoints.py',
|
||||
output: ['tu_tracepoints.c', 'tu_tracepoints.h', 'tu_tracepoints_perfetto.h'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'-p', join_paths(meson.source_root(), 'src/util/perf/'),
|
||||
'--utrace-src', '@OUTPUT0@',
|
||||
'--utrace-hdr', '@OUTPUT1@',
|
||||
'--perfetto-hdr', '@OUTPUT2@',
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
||||
if with_perfetto
|
||||
libtu_files += ['tu_perfetto.cc', 'tu_perfetto_util.c']
|
||||
tu_deps += dep_perfetto
|
||||
endif
|
||||
|
||||
libvulkan_freedreno = shared_library(
|
||||
'vulkan_freedreno',
|
||||
[libtu_files, tu_entrypoints, freedreno_xml_header_files],
|
||||
[libtu_files, tu_entrypoints, tu_tracepoints, freedreno_xml_header_files],
|
||||
include_directories : [
|
||||
inc_include,
|
||||
inc_src,
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#include "util/half_float.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
#include "tu_tracepoints.h"
|
||||
|
||||
static uint32_t
|
||||
tu_pack_float32_for_unorm(float val, int bits)
|
||||
{
|
||||
|
@ -1370,6 +1372,8 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
|
|||
unreachable("unexpected D32_S8 aspect mask in blit_image");
|
||||
}
|
||||
|
||||
trace_start_blit(&cmd->trace);
|
||||
|
||||
ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
|
||||
blit_param, false, dst_image->layout[0].ubwc,
|
||||
dst_image->layout[0].nr_samples);
|
||||
|
@ -1418,6 +1422,12 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
|
||||
trace_end_blit(&cmd->trace,
|
||||
ops == &r3d_ops,
|
||||
src_image->vk_format,
|
||||
dst_image->vk_format,
|
||||
layers);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
@ -2032,6 +2042,8 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
{
|
||||
const struct blit_ops *ops = &r2d_ops;
|
||||
|
||||
trace_start_resolve(&cmd->trace);
|
||||
|
||||
ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
0, false, dst->ubwc_enabled, VK_SAMPLE_COUNT_1_BIT);
|
||||
ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
|
||||
|
@ -2048,6 +2060,8 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
|
||||
trace_end_resolve(&cmd->trace);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
|
||||
#include "tu_cs.h"
|
||||
|
||||
#include "tu_tracepoints.h"
|
||||
|
||||
void
|
||||
tu6_emit_event_write(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
|
@ -987,9 +989,13 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit_regs(cs,
|
||||
A6XX_SP_TP_WINDOW_OFFSET(.x = 0, .y = 0));
|
||||
|
||||
trace_start_binning_ib(&cmd->trace);
|
||||
|
||||
/* emit IB to binning drawcmds: */
|
||||
tu_cs_emit_call(cs, &cmd->draw_cs);
|
||||
|
||||
trace_end_binning_ib(&cmd->trace);
|
||||
|
||||
/* switching from binning pass to GMEM pass will cause a switch from
|
||||
* PROGRAM_BINNING to PROGRAM, which invalidates const state (XS_CONST states)
|
||||
* so make sure these states are re-emitted
|
||||
|
@ -1336,13 +1342,18 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
|
|||
for (uint32_t ty = ty1; ty < ty2; ty++) {
|
||||
for (uint32_t tx = tx1; tx < tx2; tx++, slot++) {
|
||||
tu6_emit_tile_select(cmd, &cmd->cs, tx, ty, pipe, slot);
|
||||
|
||||
trace_start_draw_ib_gmem(&cmd->trace);
|
||||
tu6_render_tile(cmd, &cmd->cs);
|
||||
trace_end_draw_ib_gmem(&cmd->trace);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tu6_tile_render_end(cmd, &cmd->cs);
|
||||
|
||||
trace_end_render_pass(&cmd->trace, fb);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1350,9 +1361,15 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
|
|||
{
|
||||
tu6_sysmem_render_begin(cmd, &cmd->cs);
|
||||
|
||||
trace_start_draw_ib_sysmem(&cmd->trace);
|
||||
|
||||
tu_cs_emit_call(&cmd->cs, &cmd->draw_cs);
|
||||
|
||||
trace_end_draw_ib_sysmem(&cmd->trace);
|
||||
|
||||
tu6_sysmem_render_end(cmd, &cmd->cs);
|
||||
|
||||
trace_end_render_pass(&cmd->trace, cmd->state.framebuffer);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
@ -1384,6 +1401,8 @@ tu_create_cmd_buffer(struct tu_device *device,
|
|||
cmd_buffer->queue_family_index = TU_QUEUE_GENERAL;
|
||||
}
|
||||
|
||||
u_trace_init(&cmd_buffer->trace, &device->trace_context);
|
||||
|
||||
tu_cs_init(&cmd_buffer->cs, device, TU_CS_MODE_GROW, 4096);
|
||||
tu_cs_init(&cmd_buffer->draw_cs, device, TU_CS_MODE_GROW, 4096);
|
||||
tu_cs_init(&cmd_buffer->tile_store_cs, device, TU_CS_MODE_GROW, 2048);
|
||||
|
@ -1406,6 +1425,8 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
|
|||
tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
|
||||
tu_cs_finish(&cmd_buffer->sub_cs);
|
||||
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
|
||||
vk_object_free(&cmd_buffer->device->vk, &cmd_buffer->pool->alloc, cmd_buffer);
|
||||
}
|
||||
|
||||
|
@ -1425,6 +1446,9 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
|
|||
memset(&cmd_buffer->descriptors[i].push_set, 0, sizeof(cmd_buffer->descriptors[i].push_set));
|
||||
}
|
||||
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context);
|
||||
|
||||
cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
|
||||
|
||||
return cmd_buffer->record_result;
|
||||
|
@ -3069,6 +3093,8 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
|||
cmd->state.framebuffer = fb;
|
||||
cmd->state.render_area = pRenderPassBegin->renderArea;
|
||||
|
||||
trace_start_render_pass(&cmd->trace);
|
||||
|
||||
/* Note: because this is external, any flushes will happen before draw_cs
|
||||
* gets called. However deferred flushes could have to happen later as part
|
||||
* of the subpass.
|
||||
|
@ -4468,6 +4494,8 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
A6XX_HLSQ_CS_KERNEL_GROUP_Y(1),
|
||||
A6XX_HLSQ_CS_KERNEL_GROUP_Z(1));
|
||||
|
||||
trace_start_compute(&cmd->trace);
|
||||
|
||||
if (info->indirect) {
|
||||
uint64_t iova = tu_buffer_iova(info->indirect) + info->indirect_offset;
|
||||
|
||||
|
@ -4486,6 +4514,11 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
|
||||
}
|
||||
|
||||
trace_end_compute(&cmd->trace,
|
||||
info->indirect != NULL,
|
||||
local_size[0], local_size[1], local_size[2],
|
||||
info->blocks[0], info->blocks[1], info->blocks[2]);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
}
|
||||
|
||||
|
|
|
@ -347,6 +347,10 @@ tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
|
|||
|
||||
*pInstance = tu_instance_to_handle(instance);
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
tu_perfetto_init();
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1240,6 +1244,92 @@ tu_queue_finish(struct tu_queue *queue)
|
|||
tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
|
||||
{
|
||||
/* This is based on the 19.2MHz always-on rbbm timer.
|
||||
*
|
||||
* TODO we should probably query this value from kernel..
|
||||
*/
|
||||
return ts * (1000000000 / 19200000);
|
||||
}
|
||||
|
||||
static void*
|
||||
tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
|
||||
{
|
||||
struct tu_device *device =
|
||||
container_of(utctx, struct tu_device, trace_context);
|
||||
|
||||
struct tu_bo *bo = ralloc(NULL, struct tu_bo);
|
||||
tu_bo_init_new(device, bo, size, false);
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
|
||||
{
|
||||
struct tu_device *device =
|
||||
container_of(utctx, struct tu_device, trace_context);
|
||||
struct tu_bo *bo = timestamps;
|
||||
|
||||
tu_bo_finish(device, bo);
|
||||
ralloc_free(bo);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_trace_record_ts(struct u_trace *ut, void *timestamps,
|
||||
unsigned idx)
|
||||
{
|
||||
struct tu_cmd_buffer *cmd = container_of(ut, struct tu_cmd_buffer, trace);
|
||||
struct tu_bo *bo = timestamps;
|
||||
struct tu_cs *cs = &cmd->cs;
|
||||
|
||||
unsigned ts_offset = idx * sizeof(uint64_t);
|
||||
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 4);
|
||||
tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
|
||||
tu_cs_emit_qw(cs, bo->iova + ts_offset);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
tu_trace_read_ts(struct u_trace_context *utctx,
|
||||
void *timestamps, unsigned idx, void *flush_data)
|
||||
{
|
||||
struct tu_device *device =
|
||||
container_of(utctx, struct tu_device, trace_context);
|
||||
struct tu_bo *bo = timestamps;
|
||||
struct tu_u_trace_flush_data *trace_flush_data = flush_data;
|
||||
|
||||
/* Only need to stall on results for the first entry: */
|
||||
if (idx == 0) {
|
||||
tu_device_wait_u_trace(device, trace_flush_data->syncobj);
|
||||
}
|
||||
|
||||
if (tu_bo_map(device, bo) != VK_SUCCESS) {
|
||||
return U_TRACE_NO_TIMESTAMP;
|
||||
}
|
||||
|
||||
uint64_t *ts = bo->map;
|
||||
|
||||
/* Don't translate the no-timestamp marker: */
|
||||
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
|
||||
return U_TRACE_NO_TIMESTAMP;
|
||||
|
||||
return tu_device_ticks_to_ns(device, ts[idx]);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
|
||||
{
|
||||
struct tu_device *device =
|
||||
container_of(utctx, struct tu_device, trace_context);
|
||||
struct tu_u_trace_flush_data *trace_flush_data = flush_data;
|
||||
|
||||
vk_free(&device->vk.alloc, trace_flush_data->syncobj);
|
||||
vk_free(&device->vk.alloc, trace_flush_data);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo *pCreateInfo,
|
||||
|
@ -1480,6 +1570,14 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
|
||||
mtx_init(&device->mutex, mtx_plain);
|
||||
|
||||
device->submit_count = 0;
|
||||
u_trace_context_init(&device->trace_context, device,
|
||||
tu_trace_create_ts_buffer,
|
||||
tu_trace_destroy_ts_buffer,
|
||||
tu_trace_record_ts,
|
||||
tu_trace_read_ts,
|
||||
tu_trace_delete_flush_data);
|
||||
|
||||
*pDevice = tu_device_to_handle(device);
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
@ -1521,6 +1619,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
if (!device)
|
||||
return;
|
||||
|
||||
u_trace_context_fini(&device->trace_context);
|
||||
|
||||
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
|
||||
for (unsigned q = 0; q < device->queue_count[i]; q++)
|
||||
tu_queue_finish(&device->queues[i][q]);
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "drm-uapi/msm_drm.h"
|
||||
#include "util/timespec.h"
|
||||
#include "util/os_time.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
|
||||
#include "tu_private.h"
|
||||
|
||||
|
@ -112,6 +113,12 @@ struct tu_queue_submit
|
|||
uint32_t counter_pass_index;
|
||||
};
|
||||
|
||||
struct tu_u_trace_syncobj
|
||||
{
|
||||
uint32_t msm_queue_id;
|
||||
uint32_t fence;
|
||||
};
|
||||
|
||||
static int
|
||||
tu_drm_get_param(const struct tu_physical_device *dev,
|
||||
uint32_t param,
|
||||
|
@ -165,6 +172,12 @@ tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
|
|||
return tu_drm_get_param(dev, MSM_PARAM_GMEM_BASE, base);
|
||||
}
|
||||
|
||||
int
|
||||
tu_drm_get_timestamp(struct tu_physical_device *device, uint64_t *ts)
|
||||
{
|
||||
return tu_drm_get_param(device, MSM_PARAM_TIMESTAMP, ts);
|
||||
}
|
||||
|
||||
int
|
||||
tu_drm_submitqueue_new(const struct tu_device *dev,
|
||||
int priority,
|
||||
|
@ -1052,6 +1065,12 @@ tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
|
|||
static VkResult
|
||||
tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
|
||||
{
|
||||
queue->device->submit_count++;
|
||||
|
||||
#if HAVE_PERFETTO
|
||||
tu_perfetto_submit(queue->device, queue->device->submit_count);
|
||||
#endif
|
||||
|
||||
uint32_t flags = MSM_PIPE_3D0;
|
||||
|
||||
if (submit->nr_in_syncobjs)
|
||||
|
@ -1118,6 +1137,35 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
|
|||
sem->timeline.highest_submitted = signal_value;
|
||||
}
|
||||
|
||||
if (u_trace_context_tracing(&queue->device->trace_context)) {
|
||||
bool has_chunks = false;
|
||||
for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[i]);
|
||||
if (!list_is_empty(&cmdbuf->trace.trace_chunks)) {
|
||||
has_chunks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_chunks) {
|
||||
struct tu_u_trace_flush_data *flush_data =
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
flush_data->submission_id = queue->device->submit_count;
|
||||
flush_data->syncobj =
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
flush_data->syncobj->fence = req.fence;
|
||||
flush_data->syncobj->msm_queue_id = queue->msm_queue_id;
|
||||
|
||||
for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[i]);
|
||||
bool free_data = i == (submit->cmd_buffer_count - 1);
|
||||
u_trace_flush(&cmdbuf->trace, flush_data, free_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pthread_cond_broadcast(&queue->device->timeline_cond);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
@ -1246,6 +1294,37 @@ tu_device_submit_deferred_locked(struct tu_device *dev)
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline void
|
||||
get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
|
||||
{
|
||||
struct timespec t;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
tv->tv_sec = t.tv_sec + ns / 1000000000;
|
||||
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
|
||||
{
|
||||
struct drm_msm_wait_fence req = {
|
||||
.fence = syncobj->fence,
|
||||
.queueid = syncobj->msm_queue_id,
|
||||
};
|
||||
int ret;
|
||||
|
||||
get_abs_timeout(&req.timeout, 1000000000);
|
||||
|
||||
ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
|
||||
if (ret && (ret != -ETIMEDOUT)) {
|
||||
fprintf(stderr, "wait-fence failed! %d (%s)", ret, strerror(errno));
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
|
||||
close(syncobj->fence);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_QueueSubmit(VkQueue _queue,
|
||||
uint32_t submitCount,
|
||||
|
|
|
@ -0,0 +1,291 @@
|
|||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <perfetto.h>
|
||||
|
||||
#include "tu_perfetto.h"
|
||||
|
||||
#include "util/u_perfetto.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
#include "tu_tracepoints.h"
|
||||
#include "tu_tracepoints_perfetto.h"
|
||||
|
||||
static uint32_t gpu_clock_id;
|
||||
static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
|
||||
|
||||
/**
|
||||
* The timestamp at the point where we first emitted the clock_sync..
|
||||
* this will be a *later* timestamp that the first GPU traces (since
|
||||
* we capture the first clock_sync from the CPU *after* the first GPU
|
||||
* tracepoints happen). To avoid confusing perfetto we need to drop
|
||||
* the GPU traces with timestamps before this.
|
||||
*/
|
||||
static uint64_t sync_gpu_ts;
|
||||
|
||||
struct TuRenderpassIncrementalState {
|
||||
bool was_cleared = true;
|
||||
};
|
||||
|
||||
struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
|
||||
using IncrementalStateType = TuRenderpassIncrementalState;
|
||||
};
|
||||
|
||||
class TuRenderpassDataSource : public perfetto::DataSource<TuRenderpassDataSource, TuRenderpassTraits> {
|
||||
public:
|
||||
void OnSetup(const SetupArgs &) override
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs &) override
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
u_trace_perfetto_start();
|
||||
PERFETTO_LOG("Tracing started");
|
||||
|
||||
/* Note: clock_id's below 128 are reserved.. for custom clock sources,
|
||||
* using the hash of a namespaced string is the recommended approach.
|
||||
* See: https://perfetto.dev/docs/concepts/clock-sync
|
||||
*/
|
||||
gpu_clock_id =
|
||||
_mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs &) override
|
||||
{
|
||||
PERFETTO_LOG("Tracing stopped");
|
||||
|
||||
// Undo any initialization done in OnStart.
|
||||
u_trace_perfetto_stop();
|
||||
// TODO we should perhaps block until queued traces are flushed?
|
||||
|
||||
Trace([](TuRenderpassDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->Finalize();
|
||||
ctx.Flush();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
|
||||
|
||||
static void
|
||||
send_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
|
||||
{
|
||||
PERFETTO_LOG("Sending renderstage descriptors");
|
||||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(0);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_gpu_id(0);
|
||||
|
||||
auto spec = event->set_specifications();
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
|
||||
auto desc = spec->add_hw_queue();
|
||||
|
||||
desc->set_name(queues[i].name);
|
||||
desc->set_description(queues[i].desc);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
|
||||
auto desc = spec->add_stage();
|
||||
|
||||
desc->set_name(stages[i].name);
|
||||
if (stages[i].desc)
|
||||
desc->set_description(stages[i].desc);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage)
|
||||
{
|
||||
struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
|
||||
|
||||
p->start_ts[stage] = ts_ns;
|
||||
}
|
||||
|
||||
typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
|
||||
|
||||
static void
|
||||
stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage,
|
||||
uint32_t submission_id, const void* payload = nullptr,
|
||||
trace_payload_as_extra_func payload_as_extra = nullptr)
|
||||
{
|
||||
struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
|
||||
|
||||
/* If we haven't managed to calibrate the alignment between GPU and CPU
|
||||
* timestamps yet, then skip this trace, otherwise perfetto won't know
|
||||
* what to do with it.
|
||||
*/
|
||||
if (!sync_gpu_ts)
|
||||
return;
|
||||
|
||||
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
|
||||
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
|
||||
send_descriptors(tctx, p->start_ts[stage]);
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(p->start_ts[stage]);
|
||||
packet->set_timestamp_clock_id(gpu_clock_id);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_event_id(0); // ???
|
||||
event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
|
||||
event->set_duration(ts_ns - p->start_ts[stage]);
|
||||
event->set_stage_id(stage);
|
||||
event->set_context((uintptr_t)dev);
|
||||
event->set_submission_id(submission_id);
|
||||
|
||||
if (payload && payload_as_extra) {
|
||||
payload_as_extra(event, payload);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
tu_perfetto_init(void)
|
||||
{
|
||||
util_perfetto_init();
|
||||
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("gpu.renderstages.msm");
|
||||
TuRenderpassDataSource::Register(dsd);
|
||||
}
|
||||
|
||||
static void
|
||||
sync_timestamp(struct tu_device *dev)
|
||||
{
|
||||
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint64_t gpu_ts = 0;
|
||||
|
||||
if (cpu_ts < next_clock_sync_ns)
|
||||
return;
|
||||
|
||||
if (tu_device_get_timestamp(dev, &gpu_ts)) {
|
||||
PERFETTO_ELOG("Could not sync CPU and GPU clocks");
|
||||
return;
|
||||
}
|
||||
|
||||
/* convert GPU ts into ns: */
|
||||
gpu_ts = tu_device_ticks_to_ns(dev, gpu_ts);
|
||||
|
||||
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(cpu_ts);
|
||||
|
||||
auto event = packet->set_clock_snapshot();
|
||||
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
clock->set_timestamp(cpu_ts);
|
||||
}
|
||||
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(gpu_clock_id);
|
||||
clock->set_timestamp(gpu_ts);
|
||||
}
|
||||
|
||||
sync_gpu_ts = gpu_ts;
|
||||
next_clock_sync_ns = cpu_ts + 30000000;
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
emit_submit_id(uint32_t submission_id)
|
||||
{
|
||||
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
|
||||
|
||||
auto event = packet->set_vulkan_api_event();
|
||||
auto submit = event->set_vk_queue_submit();
|
||||
|
||||
submit->set_submission_id(submission_id);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id)
|
||||
{
|
||||
sync_timestamp(dev);
|
||||
emit_submit_id(submission_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Trace callbacks, called from u_trace once the timestamps from GPU have been
|
||||
* collected.
|
||||
*/
|
||||
|
||||
#define CREATE_EVENT_CALLBACK(event_name, stage) \
|
||||
void \
|
||||
tu_start_##event_name(struct tu_device *dev, uint64_t ts_ns, \
|
||||
const void *flush_data, \
|
||||
const struct trace_start_##event_name *payload) \
|
||||
{ \
|
||||
stage_start(dev, ts_ns, stage); \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
tu_end_##event_name(struct tu_device *dev, uint64_t ts_ns, \
|
||||
const void *flush_data, \
|
||||
const struct trace_end_##event_name *payload) \
|
||||
{ \
|
||||
auto trace_flush_data = (const struct tu_u_trace_flush_data *) flush_data; \
|
||||
uint32_t submission_id = \
|
||||
tu_u_trace_flush_data_get_submit_id(trace_flush_data); \
|
||||
stage_end(dev, ts_ns, stage, submission_id, payload, \
|
||||
(trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name);\
|
||||
}
|
||||
|
||||
CREATE_EVENT_CALLBACK(render_pass, SURFACE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(resolve, RESOLVE_STAGE_ID)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef TU_PERFETTO_H_
|
||||
#define TU_PERFETTO_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
/**
|
||||
* Render-stage id's
|
||||
*/
|
||||
enum tu_stage_id {
|
||||
SURFACE_STAGE_ID, /* Surface is a sort of meta-stage for render-target info */
|
||||
BINNING_STAGE_ID,
|
||||
GMEM_STAGE_ID,
|
||||
BYPASS_STAGE_ID,
|
||||
BLIT_STAGE_ID,
|
||||
COMPUTE_STAGE_ID,
|
||||
CLEAR_RESTORE_STAGE_ID,
|
||||
RESOLVE_STAGE_ID,
|
||||
// TODO add the rest
|
||||
|
||||
NUM_STAGES
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
} stages[] = {
|
||||
[SURFACE_STAGE_ID] = {"Surface"},
|
||||
[BINNING_STAGE_ID] = {"Binning", "Perform Visibility pass and determine target bins"},
|
||||
[GMEM_STAGE_ID] = {"Render", "Rendering to GMEM"},
|
||||
[BYPASS_STAGE_ID] = {"Render", "Rendering to system memory"},
|
||||
[BLIT_STAGE_ID] = {"Blit", "Performing a Blit operation"},
|
||||
[COMPUTE_STAGE_ID] = {"Compute", "Compute job"},
|
||||
[CLEAR_RESTORE_STAGE_ID] = {"Clear/Restore", "Clear (sysmem) or per-tile clear or restore (GMEM)"},
|
||||
[RESOLVE_STAGE_ID] = {"Resolve", "Per tile resolve (GMEM to system memory"},
|
||||
// TODO add the rest
|
||||
};
|
||||
|
||||
/**
|
||||
* Queue-id's
|
||||
*/
|
||||
enum {
|
||||
DEFAULT_HW_QUEUE_ID,
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
} queues[] = {
|
||||
[DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
|
||||
};
|
||||
|
||||
struct tu_perfetto_state {
|
||||
uint64_t start_ts[NUM_STAGES];
|
||||
};
|
||||
|
||||
void tu_perfetto_init(void);
|
||||
|
||||
struct tu_device;
|
||||
void tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id);
|
||||
|
||||
/* Helpers */
|
||||
|
||||
struct tu_perfetto_state *
|
||||
tu_device_get_perfetto_state(struct tu_device *dev);
|
||||
|
||||
int
|
||||
tu_device_get_timestamp(struct tu_device *dev,
|
||||
uint64_t *ts);
|
||||
|
||||
uint64_t
|
||||
tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
|
||||
|
||||
struct tu_u_trace_flush_data;
|
||||
uint32_t
|
||||
tu_u_trace_flush_data_get_submit_id(const struct tu_u_trace_flush_data *data);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* TU_PERFETTO_H_ */
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright © 2021 Igalia S.L.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "tu_private.h"
|
||||
#include "tu_perfetto.h"
|
||||
|
||||
/* Including tu_private.h in tu_perfetto.cc doesn't work, so
|
||||
* we need some helper methods to access tu_device.
|
||||
*/
|
||||
|
||||
struct tu_perfetto_state *
|
||||
tu_device_get_perfetto_state(struct tu_device *dev)
|
||||
{
|
||||
return &dev->perfetto;
|
||||
}
|
||||
|
||||
int
|
||||
tu_device_get_timestamp(struct tu_device *dev,
|
||||
uint64_t *ts)
|
||||
{
|
||||
return tu_drm_get_timestamp(dev->physical_device, ts);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
tu_u_trace_flush_data_get_submit_id(const struct tu_u_trace_flush_data *data)
|
||||
{
|
||||
return data->submission_id;
|
||||
}
|
|
@ -53,6 +53,7 @@
|
|||
#include "util/macros.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_debug_report.h"
|
||||
#include "vk_device.h"
|
||||
|
@ -75,6 +76,7 @@
|
|||
|
||||
#include "tu_descriptor_set.h"
|
||||
#include "tu_util.h"
|
||||
#include "tu_perfetto.h"
|
||||
|
||||
/* Pre-declarations needed for WSI entrypoints */
|
||||
struct wl_surface;
|
||||
|
@ -291,6 +293,7 @@ struct tu_pipeline_key
|
|||
#define TU_MAX_QUEUE_FAMILIES 1
|
||||
|
||||
struct tu_syncobj;
|
||||
struct tu_u_trace_syncobj;
|
||||
|
||||
struct tu_queue
|
||||
{
|
||||
|
@ -425,6 +428,14 @@ struct tu_device
|
|||
TU_GRALLOC_OTHER,
|
||||
} gralloc_type;
|
||||
#endif
|
||||
|
||||
uint32_t submit_count;
|
||||
|
||||
struct u_trace_context trace_context;
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
struct tu_perfetto_state perfetto;
|
||||
#endif
|
||||
};
|
||||
|
||||
void tu_init_clear_blit_shaders(struct tu_device *dev);
|
||||
|
@ -445,6 +456,12 @@ tu_device_is_lost(struct tu_device *device)
|
|||
VkResult
|
||||
tu_device_submit_deferred_locked(struct tu_device *dev);
|
||||
|
||||
VkResult
|
||||
tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj);
|
||||
|
||||
uint64_t
|
||||
tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
|
||||
|
||||
enum tu_bo_alloc_flags
|
||||
{
|
||||
TU_BO_ALLOC_NO_FLAGS = 0,
|
||||
|
@ -1042,6 +1059,8 @@ struct tu_cmd_buffer
|
|||
struct tu_cmd_pool *pool;
|
||||
struct list_head pool_link;
|
||||
|
||||
struct u_trace trace;
|
||||
|
||||
VkCommandBufferUsageFlags usage_flags;
|
||||
VkCommandBufferLevel level;
|
||||
enum tu_cmd_buffer_status status;
|
||||
|
@ -1691,6 +1710,10 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
VkResult
|
||||
tu_enumerate_devices(struct tu_instance *instance);
|
||||
|
||||
int
|
||||
tu_drm_get_timestamp(struct tu_physical_device *device,
|
||||
uint64_t *ts);
|
||||
|
||||
int
|
||||
tu_drm_submitqueue_new(const struct tu_device *dev,
|
||||
int priority,
|
||||
|
@ -1705,6 +1728,12 @@ tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_
|
|||
int
|
||||
tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync);
|
||||
|
||||
struct tu_u_trace_flush_data
|
||||
{
|
||||
uint32_t submission_id;
|
||||
struct tu_u_trace_syncobj *syncobj;
|
||||
};
|
||||
|
||||
#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
|
||||
\
|
||||
static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
#
|
||||
# Copyright © 2021 Igalia S.L.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
#
|
||||
# TODO can we do this with less boilerplate?
|
||||
#
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--import-path', required=True)
|
||||
parser.add_argument('--utrace-src', required=True)
|
||||
parser.add_argument('--utrace-hdr', required=True)
|
||||
parser.add_argument('--perfetto-hdr', required=True)
|
||||
args = parser.parse_args()
|
||||
sys.path.insert(0, args.import_path)
|
||||
|
||||
|
||||
from u_trace import Header, HeaderScope
|
||||
from u_trace import ForwardDecl
|
||||
from u_trace import Tracepoint
|
||||
from u_trace import TracepointArg as Arg
|
||||
from u_trace import TracepointArgStruct as ArgStruct
|
||||
from u_trace import utrace_generate
|
||||
from u_trace import utrace_generate_perfetto_utils
|
||||
|
||||
#
|
||||
# Tracepoint definitions:
|
||||
#
|
||||
|
||||
Header('util/u_dump.h')
|
||||
Header('vk_format.h')
|
||||
Header('freedreno/vulkan/tu_private.h', scope=HeaderScope.SOURCE)
|
||||
|
||||
ForwardDecl('struct tu_device')
|
||||
|
||||
Tracepoint('start_render_pass',
|
||||
tp_perfetto='tu_start_render_pass'
|
||||
)
|
||||
Tracepoint('end_render_pass',
|
||||
args=[ArgStruct(type='const struct tu_framebuffer *', var='fb')],
|
||||
tp_struct=[Arg(type='uint16_t', name='width', var='fb->width', c_format='%u'),
|
||||
Arg(type='uint16_t', name='height', var='fb->height', c_format='%u'),
|
||||
Arg(type='uint8_t', name='MRTs', var='fb->attachment_count', c_format='%u'),
|
||||
# Arg(type='uint8_t', name='samples', var='fb->samples', c_format='%u'),
|
||||
Arg(type='uint16_t', name='numberOfBins', var='fb->tile_count.width * fb->tile_count.height', c_format='%u'),
|
||||
Arg(type='uint16_t', name='binWidth', var='fb->tile0.width', c_format='%u'),
|
||||
Arg(type='uint16_t', name='binHeight', var='fb->tile0.height', c_format='%u')],
|
||||
tp_perfetto='tu_end_render_pass')
|
||||
|
||||
Tracepoint('start_binning_ib',
|
||||
tp_perfetto='tu_start_binning_ib')
|
||||
Tracepoint('end_binning_ib',
|
||||
tp_perfetto='tu_end_binning_ib')
|
||||
|
||||
Tracepoint('start_resolve',
|
||||
tp_perfetto='tu_start_resolve')
|
||||
Tracepoint('end_resolve',
|
||||
tp_perfetto='tu_end_resolve')
|
||||
|
||||
Tracepoint('start_draw_ib_sysmem',
|
||||
tp_perfetto='tu_start_draw_ib_sysmem')
|
||||
Tracepoint('end_draw_ib_sysmem',
|
||||
tp_perfetto='tu_end_draw_ib_sysmem')
|
||||
|
||||
Tracepoint('start_draw_ib_gmem',
|
||||
tp_perfetto='tu_start_draw_ib_gmem')
|
||||
Tracepoint('end_draw_ib_gmem',
|
||||
tp_perfetto='tu_end_draw_ib_gmem')
|
||||
|
||||
Tracepoint('start_blit',
|
||||
tp_perfetto='tu_start_blit',
|
||||
)
|
||||
Tracepoint('end_blit',
|
||||
# TODO: add source megapixels count and target megapixels count arguments
|
||||
args=[Arg(type='uint8_t', var='uses_3d_blit', c_format='%u'),
|
||||
Arg(type='enum VkFormat', var='src_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'),
|
||||
Arg(type='enum VkFormat', var='dst_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'),
|
||||
Arg(type='uint8_t', var='layers', c_format='%u')],
|
||||
tp_perfetto='tu_end_blit')
|
||||
|
||||
Tracepoint('start_compute',
|
||||
tp_perfetto='tu_start_compute')
|
||||
Tracepoint('end_compute',
|
||||
args=[Arg(type='uint8_t', var='indirect', c_format='%u'),
|
||||
Arg(type='uint16_t', var='local_size_x', c_format='%u'),
|
||||
Arg(type='uint16_t', var='local_size_y', c_format='%u'),
|
||||
Arg(type='uint16_t', var='local_size_z', c_format='%u'),
|
||||
Arg(type='uint16_t', var='num_groups_x', c_format='%u'),
|
||||
Arg(type='uint16_t', var='num_groups_y', c_format='%u'),
|
||||
Arg(type='uint16_t', var='num_groups_z', c_format='%u')],
|
||||
tp_perfetto='tu_end_compute')
|
||||
|
||||
utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct tu_device *dev')
|
||||
utrace_generate_perfetto_utils(hpath=args.perfetto_hdr)
|
|
@ -247,6 +247,9 @@ VKAPI_ATTR VkResult VKAPI_CALL
|
|||
tu_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_queue, queue, _queue);
|
||||
|
||||
u_trace_context_process(&queue->device->trace_context, true);
|
||||
|
||||
return wsi_common_queue_present(
|
||||
&queue->device->physical_device->wsi_device,
|
||||
tu_device_to_handle(queue->device), _queue, queue->queue_family_index,
|
||||
|
|
|
@ -28,6 +28,10 @@
|
|||
#include <vulkan/vulkan_core.h>
|
||||
#include "util/format/u_format.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum pipe_format
|
||||
vk_format_to_pipe_format(enum VkFormat vkformat);
|
||||
|
||||
|
@ -84,4 +88,8 @@ vk_format_stencil_only(VkFormat format)
|
|||
return VK_FORMAT_S8_UINT;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue