turnip/perfetto: reusable command buffers support
The limitation is that the reusable command buffer should be created when perfetto is already connected in order to write timestamps. Otherwise such cmd buffer won't be traces. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Reviewed-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Hyunjun Ko <zzoon@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10969>
This commit is contained in:
parent
0565c993f9
commit
5c6f0d46e7
|
@ -1326,10 +1326,86 @@ tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
|
|||
container_of(utctx, struct tu_device, trace_context);
|
||||
struct tu_u_trace_flush_data *trace_flush_data = flush_data;
|
||||
|
||||
tu_u_trace_cmd_data_finish(device, trace_flush_data->cmd_trace_data,
|
||||
trace_flush_data->trace_count);
|
||||
vk_free(&device->vk.alloc, trace_flush_data->syncobj);
|
||||
vk_free(&device->vk.alloc, trace_flush_data);
|
||||
}
|
||||
|
||||
void
|
||||
tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
|
||||
void *ts_from, uint32_t from_offset,
|
||||
void *ts_to, uint32_t to_offset,
|
||||
uint32_t count)
|
||||
{
|
||||
struct tu_cs *cs = cmdstream;
|
||||
struct tu_bo *bo_from = ts_from;
|
||||
struct tu_bo *bo_to = ts_to;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
|
||||
tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
|
||||
tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
|
||||
tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
|
||||
struct u_trace **trace_copy)
|
||||
{
|
||||
*cs = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct tu_cs), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
if (*cs == NULL) {
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
|
||||
list_length(&cmdbuf->trace.trace_chunks) * 6 + 3);
|
||||
|
||||
tu_cs_begin(*cs);
|
||||
|
||||
tu_cs_emit_wfi(*cs);
|
||||
tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
*trace_copy = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
if (*trace_copy == NULL) {
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
u_trace_init(*trace_copy, cmdbuf->trace.utctx);
|
||||
u_trace_clone_append(u_trace_begin_iterator(&cmdbuf->trace),
|
||||
u_trace_end_iterator(&cmdbuf->trace),
|
||||
*trace_copy, *cs,
|
||||
tu_copy_timestamp_buffer);
|
||||
|
||||
tu_cs_emit_wfi(*cs);
|
||||
|
||||
tu_cs_end(*cs);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
tu_u_trace_cmd_data_finish(struct tu_device *device,
|
||||
struct tu_u_trace_cmd_data *trace_data,
|
||||
uint32_t entry_count)
|
||||
{
|
||||
for (uint32_t i = 0; i < entry_count; ++i) {
|
||||
/* Only if we had to create a copy of trace we should free it */
|
||||
if (trace_data[i].timestamp_copy_cs != NULL) {
|
||||
tu_cs_finish(trace_data[i].timestamp_copy_cs);
|
||||
vk_free(&device->vk.alloc, trace_data[i].timestamp_copy_cs);
|
||||
|
||||
u_trace_fini(trace_data[i].trace);
|
||||
vk_free(&device->vk.alloc, trace_data[i].trace);
|
||||
}
|
||||
}
|
||||
|
||||
vk_free(&device->vk.alloc, trace_data);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo *pCreateInfo,
|
||||
|
|
|
@ -38,6 +38,8 @@
|
|||
|
||||
#include "tu_private.h"
|
||||
|
||||
#include "tu_cs.h"
|
||||
|
||||
struct tu_binary_syncobj {
|
||||
uint32_t permanent, temporary;
|
||||
};
|
||||
|
@ -85,6 +87,7 @@ struct tu_queue_submit
|
|||
struct list_head link;
|
||||
|
||||
VkCommandBuffer *cmd_buffers;
|
||||
struct tu_u_trace_cmd_data *cmd_buffer_trace_data;
|
||||
uint32_t cmd_buffer_count;
|
||||
|
||||
struct tu_syncobj **wait_semaphores;
|
||||
|
@ -938,6 +941,9 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
|
|||
}
|
||||
}
|
||||
|
||||
bool u_trace_enabled = u_trace_context_tracing(&queue->device->trace_context);
|
||||
bool has_trace_points = false;
|
||||
|
||||
uint32_t entry_count = 0;
|
||||
for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) {
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]);
|
||||
|
@ -946,6 +952,13 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
|
|||
entry_count++;
|
||||
|
||||
entry_count += cmdbuf->cs.entry_count;
|
||||
|
||||
if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) {
|
||||
if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
|
||||
entry_count++;
|
||||
|
||||
has_trace_points = true;
|
||||
}
|
||||
}
|
||||
|
||||
new_submit->cmds = vk_zalloc(&queue->device->vk.alloc,
|
||||
|
@ -957,6 +970,39 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
|
|||
goto fail_cmds;
|
||||
}
|
||||
|
||||
if (has_trace_points) {
|
||||
new_submit->cmd_buffer_trace_data = vk_zalloc(&queue->device->vk.alloc,
|
||||
new_submit->cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
if (new_submit->cmd_buffer_trace_data == NULL) {
|
||||
result = vk_error(queue->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY)
|
||||
goto fail_cmd_trace_data;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < new_submit->cmd_buffer_count; ++i) {
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[i]);
|
||||
|
||||
if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) &&
|
||||
u_trace_has_points(&cmdbuf->trace)) {
|
||||
/* A single command buffer could be submitted several times, but we
|
||||
* already backed timestamp iova addresses and trace points are
|
||||
* single-use. Therefor we have to copy trace points and create
|
||||
* a new timestamp buffer on every submit of reusable command buffer.
|
||||
*/
|
||||
if (tu_create_copy_timestamp_cs(cmdbuf,
|
||||
&new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs,
|
||||
&new_submit->cmd_buffer_trace_data[i].trace) != VK_SUCCESS) {
|
||||
result = vk_error(queue->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY)
|
||||
goto fail_copy_timestamp_cs;
|
||||
}
|
||||
assert(new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs->entry_count == 1);
|
||||
} else {
|
||||
new_submit->cmd_buffer_trace_data[i].trace = &cmdbuf->trace;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate without wait timeline semaphores */
|
||||
new_submit->in_syncobjs = vk_zalloc(&queue->device->vk.alloc,
|
||||
(nr_in_syncobjs - new_submit->wait_timeline_count) *
|
||||
|
@ -992,6 +1038,12 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
|
|||
fail_out_syncobjs:
|
||||
vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs);
|
||||
fail_in_syncobjs:
|
||||
if (new_submit->cmd_buffer_trace_data)
|
||||
tu_u_trace_cmd_data_finish(queue->device, new_submit->cmd_buffer_trace_data,
|
||||
new_submit->cmd_buffer_count);
|
||||
fail_copy_timestamp_cs:
|
||||
vk_free(&queue->device->vk.alloc, new_submit->cmd_buffer_trace_data);
|
||||
fail_cmd_trace_data:
|
||||
vk_free(&queue->device->vk.alloc, new_submit->cmds);
|
||||
fail_cmds:
|
||||
fail_signal_timelines:
|
||||
|
@ -1059,6 +1111,23 @@ tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
|
|||
cmds[entry_idx].nr_relocs = 0;
|
||||
cmds[entry_idx].relocs = 0;
|
||||
}
|
||||
|
||||
if (submit->cmd_buffer_trace_data) {
|
||||
struct tu_cs *ts_cs = submit->cmd_buffer_trace_data[j].timestamp_copy_cs;
|
||||
if (ts_cs) {
|
||||
cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
|
||||
cmds[entry_idx].submit_idx =
|
||||
queue->device->bo_idx[ts_cs->entries[0].bo->gem_handle];
|
||||
|
||||
assert(cmds[entry_idx].submit_idx < queue->device->bo_count);
|
||||
|
||||
cmds[entry_idx].submit_offset = ts_cs->entries[0].offset;
|
||||
cmds[entry_idx].size = ts_cs->entries[0].size;
|
||||
cmds[entry_idx].pad = 0;
|
||||
cmds[entry_idx].nr_relocs = 0;
|
||||
cmds[entry_idx++].relocs = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1137,32 +1206,24 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
|
|||
sem->timeline.highest_submitted = signal_value;
|
||||
}
|
||||
|
||||
if (u_trace_context_tracing(&queue->device->trace_context)) {
|
||||
bool has_chunks = false;
|
||||
if (submit->cmd_buffer_trace_data) {
|
||||
struct tu_u_trace_flush_data *flush_data =
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
flush_data->submission_id = queue->device->submit_count;
|
||||
flush_data->syncobj =
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
flush_data->syncobj->fence = req.fence;
|
||||
flush_data->syncobj->msm_queue_id = queue->msm_queue_id;
|
||||
|
||||
flush_data->cmd_trace_data = submit->cmd_buffer_trace_data;
|
||||
flush_data->trace_count = submit->cmd_buffer_count;
|
||||
submit->cmd_buffer_trace_data = NULL;
|
||||
|
||||
for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[i]);
|
||||
if (!list_is_empty(&cmdbuf->trace.trace_chunks)) {
|
||||
has_chunks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_chunks) {
|
||||
struct tu_u_trace_flush_data *flush_data =
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
flush_data->submission_id = queue->device->submit_count;
|
||||
flush_data->syncobj =
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
flush_data->syncobj->fence = req.fence;
|
||||
flush_data->syncobj->msm_queue_id = queue->msm_queue_id;
|
||||
|
||||
for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[i]);
|
||||
bool free_data = i == (submit->cmd_buffer_count - 1);
|
||||
u_trace_flush(&cmdbuf->trace, flush_data, free_data);
|
||||
}
|
||||
bool free_data = i == (submit->cmd_buffer_count - 1);
|
||||
u_trace_flush(flush_data->cmd_trace_data[i].trace, flush_data, free_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1320,8 +1381,6 @@ tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj
|
|||
return VK_TIMEOUT;
|
||||
}
|
||||
|
||||
close(syncobj->fence);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
@ -1728,10 +1728,35 @@ tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_
|
|||
int
|
||||
tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync);
|
||||
|
||||
|
||||
void
|
||||
tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
|
||||
void *ts_from, uint32_t from_offset,
|
||||
void *ts_to, uint32_t to_offset,
|
||||
uint32_t count);
|
||||
|
||||
|
||||
VkResult
|
||||
tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
|
||||
struct u_trace **trace_copy);
|
||||
|
||||
struct tu_u_trace_cmd_data
|
||||
{
|
||||
struct tu_cs *timestamp_copy_cs;
|
||||
struct u_trace *trace;
|
||||
};
|
||||
|
||||
void
|
||||
tu_u_trace_cmd_data_finish(struct tu_device *device,
|
||||
struct tu_u_trace_cmd_data *trace_data,
|
||||
uint32_t entry_count);
|
||||
|
||||
struct tu_u_trace_flush_data
|
||||
{
|
||||
uint32_t submission_id;
|
||||
struct tu_u_trace_syncobj *syncobj;
|
||||
uint32_t trace_count;
|
||||
struct tu_u_trace_cmd_data *cmd_trace_data;
|
||||
};
|
||||
|
||||
#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
|
||||
|
|
Loading…
Reference in New Issue