343 lines
12 KiB
C
343 lines
12 KiB
C
/*
|
|
* Copyright © 2021 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "anv_private.h"
|
|
|
|
#include "perf/intel_perf.h"
|
|
|
|
static uint32_t
|
|
command_buffers_count_utraces(struct anv_device *device,
|
|
uint32_t cmd_buffer_count,
|
|
struct anv_cmd_buffer **cmd_buffers,
|
|
uint32_t *utrace_copies)
|
|
{
|
|
if (!u_trace_context_actively_tracing(&device->ds.trace_context))
|
|
return 0;
|
|
|
|
uint32_t utraces = 0;
|
|
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
|
if (u_trace_has_points(&cmd_buffers[i]->trace)) {
|
|
utraces++;
|
|
if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
|
|
*utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
|
|
}
|
|
}
|
|
|
|
return utraces;
|
|
}
|
|
|
|
static void
|
|
anv_utrace_delete_flush_data(struct u_trace_context *utctx,
|
|
void *flush_data)
|
|
{
|
|
struct anv_device *device =
|
|
container_of(utctx, struct anv_device, ds.trace_context);
|
|
struct anv_utrace_flush_copy *flush = flush_data;
|
|
|
|
intel_ds_flush_data_fini(&flush->ds);
|
|
|
|
if (flush->trace_bo) {
|
|
assert(flush->batch_bo);
|
|
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
|
|
anv_device_release_bo(device, flush->batch_bo);
|
|
anv_device_release_bo(device, flush->trace_bo);
|
|
}
|
|
|
|
vk_sync_destroy(&device->vk, flush->sync);
|
|
|
|
vk_free(&device->vk.alloc, flush);
|
|
}
|
|
|
|
static void
|
|
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
|
|
void *cmdstream,
|
|
void *ts_from, uint32_t from_offset,
|
|
void *ts_to, uint32_t to_offset,
|
|
uint32_t count)
|
|
{
|
|
struct anv_device *device =
|
|
container_of(utctx, struct anv_device, ds.trace_context);
|
|
struct anv_utrace_flush_copy *flush = cmdstream;
|
|
struct anv_address from_addr = (struct anv_address) {
|
|
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
|
|
struct anv_address to_addr = (struct anv_address) {
|
|
.bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
|
|
|
|
anv_genX(&device->info, emit_so_memcpy)(&flush->memcpy_state,
|
|
to_addr, from_addr, count * sizeof(uint64_t));
|
|
}
|
|
|
|
VkResult
|
|
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
|
uint32_t cmd_buffer_count,
|
|
struct anv_cmd_buffer **cmd_buffers,
|
|
struct anv_utrace_flush_copy **out_flush_data)
|
|
{
|
|
struct anv_device *device = queue->device;
|
|
uint32_t utrace_copies = 0;
|
|
uint32_t utraces = command_buffers_count_utraces(device,
|
|
cmd_buffer_count,
|
|
cmd_buffers,
|
|
&utrace_copies);
|
|
if (!utraces) {
|
|
*out_flush_data = NULL;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult result;
|
|
struct anv_utrace_flush_copy *flush =
|
|
vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
|
|
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (!flush)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
|
|
|
|
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
|
|
0, 0, &flush->sync);
|
|
if (result != VK_SUCCESS)
|
|
goto error_sync;
|
|
|
|
if (utrace_copies > 0) {
|
|
result =
|
|
anv_device_alloc_bo(device, "utrace-copy-buf", utrace_copies * 4096,
|
|
ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */,
|
|
&flush->trace_bo);
|
|
if (result != VK_SUCCESS)
|
|
goto error_trace_buf;
|
|
|
|
result =
|
|
anv_device_alloc_bo(device, "utrace-copy-batch",
|
|
/* 128 dwords of setup + 64 dwords per copy */
|
|
align_u32(512 + 64 * utrace_copies, 4096),
|
|
ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */,
|
|
&flush->batch_bo);
|
|
if (result != VK_SUCCESS)
|
|
goto error_batch_buf;
|
|
|
|
result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
|
|
if (result != VK_SUCCESS)
|
|
goto error_reloc_list;
|
|
|
|
flush->batch.alloc = &device->vk.alloc;
|
|
flush->batch.relocs = &flush->relocs;
|
|
anv_batch_set_storage(&flush->batch,
|
|
(struct anv_address) { .bo = flush->batch_bo, },
|
|
flush->batch_bo->map, flush->batch_bo->size);
|
|
|
|
/* Emit the copies */
|
|
anv_genX(&device->info, emit_so_memcpy_init)(&flush->memcpy_state,
|
|
device,
|
|
&flush->batch);
|
|
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
|
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
|
|
u_trace_flush(&cmd_buffers[i]->trace, flush, false);
|
|
} else {
|
|
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
|
|
u_trace_end_iterator(&cmd_buffers[i]->trace),
|
|
&flush->ds.trace,
|
|
flush,
|
|
anv_device_utrace_emit_copy_ts_buffer);
|
|
}
|
|
}
|
|
anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
|
|
|
|
u_trace_flush(&flush->ds.trace, flush, true);
|
|
|
|
if (flush->batch.status != VK_SUCCESS) {
|
|
result = flush->batch.status;
|
|
goto error_batch;
|
|
}
|
|
} else {
|
|
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
|
assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
|
|
u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
|
|
}
|
|
}
|
|
|
|
flush->queue = queue;
|
|
|
|
*out_flush_data = flush;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
error_batch:
|
|
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
|
|
error_reloc_list:
|
|
anv_device_release_bo(device, flush->batch_bo);
|
|
error_batch_buf:
|
|
anv_device_release_bo(device, flush->trace_bo);
|
|
error_trace_buf:
|
|
vk_sync_destroy(&device->vk, flush->sync);
|
|
error_sync:
|
|
vk_free(&device->vk.alloc, flush);
|
|
return result;
|
|
}
|
|
|
|
static void *
|
|
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
|
|
{
|
|
struct anv_device *device =
|
|
container_of(utctx, struct anv_device, ds.trace_context);
|
|
|
|
struct anv_bo *bo = NULL;
|
|
UNUSED VkResult result =
|
|
anv_device_alloc_bo(device, "utrace-ts", align_u32(size_b, 4096),
|
|
ANV_BO_ALLOC_MAPPED, 0, &bo);
|
|
assert(result == VK_SUCCESS);
|
|
|
|
return bo;
|
|
}
|
|
|
|
static void
|
|
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
|
|
{
|
|
struct anv_device *device =
|
|
container_of(utctx, struct anv_device, ds.trace_context);
|
|
struct anv_bo *bo = timestamps;
|
|
|
|
anv_device_release_bo(device, bo);
|
|
}
|
|
|
|
static void
|
|
anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx,
|
|
bool end_of_pipe)
|
|
{
|
|
struct anv_cmd_buffer *cmd_buffer = cs;
|
|
struct anv_device *device = cmd_buffer->device;
|
|
struct anv_bo *bo = timestamps;
|
|
|
|
device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
|
|
(struct anv_address) {
|
|
.bo = bo,
|
|
.offset = idx * sizeof(uint64_t) },
|
|
end_of_pipe);
|
|
}
|
|
|
|
static uint64_t
|
|
anv_utrace_read_ts(struct u_trace_context *utctx,
|
|
void *timestamps, unsigned idx, void *flush_data)
|
|
{
|
|
struct anv_device *device =
|
|
container_of(utctx, struct anv_device, ds.trace_context);
|
|
struct anv_bo *bo = timestamps;
|
|
struct anv_utrace_flush_copy *flush = flush_data;
|
|
|
|
/* Only need to stall on results for the first entry: */
|
|
if (idx == 0) {
|
|
UNUSED VkResult result =
|
|
vk_sync_wait(&device->vk,
|
|
flush->sync,
|
|
0,
|
|
VK_SYNC_WAIT_COMPLETE,
|
|
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
|
|
assert(result == VK_SUCCESS);
|
|
}
|
|
|
|
uint64_t *ts = bo->map;
|
|
|
|
/* Don't translate the no-timestamp marker: */
|
|
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
|
|
return U_TRACE_NO_TIMESTAMP;
|
|
|
|
return intel_device_info_timebase_scale(&device->info, ts[idx]);
|
|
}
|
|
|
|
static const char *
|
|
queue_family_to_name(const struct anv_queue_family *family)
|
|
{
|
|
switch (family->engine_class) {
|
|
case I915_ENGINE_CLASS_RENDER:
|
|
return "render";
|
|
case I915_ENGINE_CLASS_COPY:
|
|
return "copy";
|
|
case I915_ENGINE_CLASS_VIDEO:
|
|
return "video";
|
|
case I915_ENGINE_CLASS_VIDEO_ENHANCE:
|
|
return "video-enh";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
void
|
|
anv_device_utrace_init(struct anv_device *device)
|
|
{
|
|
intel_ds_device_init(&device->ds, &device->info, device->fd,
|
|
device->physical->local_minor - 128,
|
|
INTEL_DS_API_VULKAN);
|
|
u_trace_context_init(&device->ds.trace_context,
|
|
&device->ds,
|
|
anv_utrace_create_ts_buffer,
|
|
anv_utrace_destroy_ts_buffer,
|
|
anv_utrace_record_ts,
|
|
anv_utrace_read_ts,
|
|
anv_utrace_delete_flush_data);
|
|
|
|
for (uint32_t q = 0; q < device->queue_count; q++) {
|
|
struct anv_queue *queue = &device->queues[q];
|
|
|
|
queue->ds =
|
|
intel_ds_device_add_queue(&device->ds, "%s%u",
|
|
queue_family_to_name(queue->family),
|
|
queue->index_in_family);
|
|
}
|
|
}
|
|
|
|
void
|
|
anv_device_utrace_finish(struct anv_device *device)
|
|
{
|
|
intel_ds_device_fini(&device->ds);
|
|
}
|
|
|
|
enum intel_ds_stall_flag
|
|
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
|
|
{
|
|
static const struct {
|
|
enum anv_pipe_bits anv;
|
|
enum intel_ds_stall_flag ds;
|
|
} anv_to_ds_flags[] = {
|
|
{ .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
|
|
{ .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
|
|
{ .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
|
|
{ .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
|
|
{ .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
|
|
{ .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
|
|
{ .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
|
|
{ .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
|
|
{ .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
|
|
{ .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, },
|
|
{ .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, },
|
|
{ .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
|
|
{ .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
|
|
};
|
|
|
|
enum intel_ds_stall_flag ret = 0;
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
|
|
if (anv_to_ds_flags[i].anv & bits)
|
|
ret |= anv_to_ds_flags[i].ds;
|
|
}
|
|
|
|
return ret;
|
|
}
|