From 4510350d5549a9d5029e01dce0e67b68a1ec1f2a Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 14 May 2024 18:05:16 +0200 Subject: [PATCH] util/u_trace: Pass explicit frame_nr argument to delimit frames Otherwise u_trace has to think that each submission is a frame, and that's not great if we want to gather statistics on per real frame basis. Signed-off-by: Danylo Piliaiev Reviewed-by: Lionel Landwerlin Part-of: --- src/freedreno/vulkan/tu_knl_drm_msm.cc | 2 +- src/freedreno/vulkan/tu_knl_drm_virtio.cc | 2 +- src/freedreno/vulkan/tu_knl_kgsl.cc | 2 +- .../drivers/freedreno/freedreno_gmem.c | 2 +- src/gallium/drivers/radeonsi/si_utrace.c | 2 +- src/intel/ds/intel_driver_ds.cc | 2 +- src/util/perf/u_trace.c | 22 +++++++++++++++++-- src/util/perf/u_trace.h | 6 ++++- 8 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index 0f827723b3ae0..7121011cf02f6 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -1043,7 +1043,7 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_msm_queue_submit *submi bool free_data = i == submission_data->last_buffer_with_tracepoints; if (submission_data->cmd_trace_data[i].trace) u_trace_flush(submission_data->cmd_trace_data[i].trace, - submission_data, free_data); + submission_data, U_TRACE_FRAME_UNKNOWN, free_data); if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) { /* u_trace is owned by cmd_buffer */ diff --git a/src/freedreno/vulkan/tu_knl_drm_virtio.cc b/src/freedreno/vulkan/tu_knl_drm_virtio.cc index 8f37b73eba446..8808288eb91f9 100644 --- a/src/freedreno/vulkan/tu_knl_drm_virtio.cc +++ b/src/freedreno/vulkan/tu_knl_drm_virtio.cc @@ -1135,7 +1135,7 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_virtio_queue_submit *su bool free_data = i == submission_data->last_buffer_with_tracepoints; if (submission_data->cmd_trace_data[i].trace) u_trace_flush(submission_data->cmd_trace_data[i].trace, - submission_data, free_data); + submission_data, U_TRACE_FRAME_UNKNOWN, free_data); if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) { /* u_trace is owned by cmd_buffer */ diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index 5e214cb8de640..a7a5cb56d8055 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -1271,7 +1271,7 @@ kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit) bool free_data = i == submission_data->last_buffer_with_tracepoints; if (submission_data->cmd_trace_data[i].trace) u_trace_flush(submission_data->cmd_trace_data[i].trace, - submission_data, free_data); + submission_data, U_TRACE_FRAME_UNKNOWN, free_data); if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) { /* u_trace is owned by cmd_buffer */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index e93477c494e4c..dd0b24c5e492b 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -817,7 +817,7 @@ fd_gmem_render_tiles(struct fd_batch *batch) flush_ring(batch); - u_trace_flush(&batch->trace, NULL, false); + u_trace_flush(&batch->trace, NULL, U_TRACE_FRAME_UNKNOWN, false); } /* Determine a worst-case estimate (ie. assuming we don't eliminate an diff --git a/src/gallium/drivers/radeonsi/si_utrace.c b/src/gallium/drivers/radeonsi/si_utrace.c index 95d7cfa215a10..847863c37a9fb 100644 --- a/src/gallium/drivers/radeonsi/si_utrace.c +++ b/src/gallium/drivers/radeonsi/si_utrace.c @@ -77,5 +77,5 @@ void si_utrace_flush(struct si_context *sctx, uint64_t submission_id) { struct si_ds_flush_data *flush_data = malloc(sizeof(*flush_data)); si_ds_flush_data_init(flush_data, &sctx->ds_queue, submission_id); - u_trace_flush(&sctx->trace, flush_data, false); + u_trace_flush(&sctx->trace, flush_data, U_TRACE_FRAME_UNKNOWN, false); } diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 789221dde01a6..c9f0050545edd 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -666,7 +666,7 @@ void intel_ds_queue_flush_data(struct intel_ds_queue *queue, bool free_data) { simple_mtx_lock(&queue->device->trace_context_mutex); - u_trace_flush(ut, data, free_data); + u_trace_flush(ut, data, U_TRACE_FRAME_UNKNOWN, free_data); simple_mtx_unlock(&queue->device->trace_context_mutex); } diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index 6d9982cb59cff..d90db9ec805b0 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -101,7 +101,8 @@ struct u_trace_chunk { struct util_queue_fence fence; bool last; /* this chunk is last in batch */ - bool eof; /* this chunk is last in frame */ + bool eof; /* this chunk is last in frame, unless frame_nr is set */ + uint32_t frame_nr; /* frame idx from the driver */ void *flush_data; /* assigned by u_trace_flush */ @@ -503,6 +504,10 @@ u_trace_context_fini(struct u_trace_context *utctx) #endif if (utctx->out) { + if (utctx->batch_nr > 0) { + utctx->out_printer->end_of_frame(utctx); + } + utctx->out_printer->end(utctx); fflush(utctx->out); } @@ -555,6 +560,15 @@ process_chunk(void *job, void *gdata, int thread_index) struct u_trace_chunk *chunk = job; struct u_trace_context *utctx = chunk->utctx; + if (chunk->frame_nr != U_TRACE_FRAME_UNKNOWN && + chunk->frame_nr != utctx->frame_nr) { + if (utctx->out) { + utctx->out_printer->end_of_frame(utctx); + } + utctx->frame_nr = chunk->frame_nr; + utctx->start_of_frame = true; + } + if (utctx->start_of_frame) { utctx->start_of_frame = false; utctx->batch_nr = 0; @@ -852,12 +866,16 @@ u_trace_appendv(struct u_trace *ut, } void -u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data) +u_trace_flush(struct u_trace *ut, + void *flush_data, + uint32_t frame_nr, + bool free_data) { list_for_each_entry (struct u_trace_chunk, chunk, &ut->trace_chunks, node) { chunk->flush_data = flush_data; chunk->free_flush_data = false; + chunk->frame_nr = frame_nr; } if (free_data && !list_is_empty(&ut->trace_chunks)) { diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index b61b7cfb800be..56479e95392cc 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -289,6 +289,7 @@ void u_trace_clone_append(struct u_trace_iterator begin_it, void u_trace_disable_event_range(struct u_trace_iterator begin_it, struct u_trace_iterator end_it); +#define U_TRACE_FRAME_UNKNOWN -1 /** * Flush traces to the parent trace-context. At this point, the expectation * is that all the tracepoints are "executed" by the GPU following any @@ -303,7 +304,10 @@ void u_trace_disable_event_range(struct u_trace_iterator begin_it, * This should typically be called when the corresponding cmdstream * (containing the timestamp reads) is flushed to the kernel. */ -void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data); +void u_trace_flush(struct u_trace *ut, + void *flush_data, + uint32_t frame_nr, + bool free_data); #ifdef HAVE_PERFETTO static ALWAYS_INLINE bool