mesa/src/nouveau/vulkan/nvk_queue_drm_nouveau.c

655 lines
22 KiB
C

/*
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#include "nvk_queue.h"
#include "nvk_cmd_buffer.h"
#include "nvk_cmd_pool.h"
#include "nvk_device.h"
#include "nvk_buffer.h"
#include "nvk_image.h"
#include "nvk_device_memory.h"
#include "nvk_physical_device.h"
#include "nouveau_context.h"
#include "drm-uapi/nouveau_drm.h"
#include "vk_drm_syncobj.h"
#include <xf86drm.h>
#define NVK_PUSH_MAX_SYNCS 256
#define NVK_PUSH_MAX_BINDS 4096
#define NVK_PUSH_MAX_PUSH 1024
struct push_builder {
uint32_t max_push;
struct drm_nouveau_sync req_wait[NVK_PUSH_MAX_SYNCS];
struct drm_nouveau_sync req_sig[NVK_PUSH_MAX_SYNCS];
struct drm_nouveau_exec_push req_push[NVK_PUSH_MAX_PUSH];
struct drm_nouveau_exec req;
struct drm_nouveau_vm_bind vmbind;
struct drm_nouveau_vm_bind_op bind_ops[NVK_PUSH_MAX_BINDS];
bool is_vmbind;
};
static void
push_builder_init(struct nvk_queue *queue,
struct push_builder *pb,
bool is_vmbind)
{
struct nvk_device *dev = nvk_queue_device(queue);
pb->max_push = is_vmbind ? 0 :
MIN2(NVK_PUSH_MAX_PUSH, dev->ws_dev->max_push);
pb->req = (struct drm_nouveau_exec) {
.channel = queue->drm.ws_ctx->channel,
.push_count = 0,
.wait_count = 0,
.sig_count = 0,
.push_ptr = (uintptr_t)&pb->req_push,
.wait_ptr = (uintptr_t)&pb->req_wait,
.sig_ptr = (uintptr_t)&pb->req_sig,
};
pb->vmbind = (struct drm_nouveau_vm_bind) {
.flags = DRM_NOUVEAU_VM_BIND_RUN_ASYNC,
.op_count = 0,
.op_ptr = (uintptr_t)&pb->bind_ops,
.wait_count = 0,
.sig_count = 0,
.wait_ptr = (uintptr_t)&pb->req_wait,
.sig_ptr = (uintptr_t)&pb->req_sig,
};
pb->is_vmbind = is_vmbind;
}
static void
push_add_syncobj_wait(struct push_builder *pb,
uint32_t syncobj,
uint64_t wait_value)
{
assert(pb->req.wait_count < NVK_PUSH_MAX_SYNCS);
pb->req_wait[pb->req.wait_count++] = (struct drm_nouveau_sync) {
.flags = wait_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ :
DRM_NOUVEAU_SYNC_SYNCOBJ,
.handle = syncobj,
.timeline_value = wait_value,
};
}
static void
push_add_sync_wait(struct push_builder *pb,
struct vk_sync_wait *wait)
{
struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(wait->sync);
assert(sync != NULL);
push_add_syncobj_wait(pb, sync->syncobj, wait->wait_value);
}
static void
push_add_sync_signal(struct push_builder *pb,
struct vk_sync_signal *sig)
{
struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(sig->sync);
assert(sync);
assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS);
pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) {
.flags = sig->signal_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ :
DRM_NOUVEAU_SYNC_SYNCOBJ,
.handle = sync->syncobj,
.timeline_value = sig->signal_value,
};
}
static void
push_bind(struct push_builder *pb, const struct drm_nouveau_vm_bind_op *bind)
{
if (pb->vmbind.op_count > 0) {
struct drm_nouveau_vm_bind_op *prev_bind =
&pb->bind_ops[pb->vmbind.op_count - 1];
/* Try to coalesce bind ops together if we can */
if (bind->op == prev_bind->op &&
bind->flags == prev_bind->flags &&
bind->handle == prev_bind->handle &&
bind->addr == prev_bind->addr + prev_bind->range &&
bind->bo_offset == prev_bind->bo_offset + prev_bind->range) {
prev_bind->range += bind->range;
return;
}
}
assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS);
pb->bind_ops[pb->vmbind.op_count++] = *bind;
}
static void
push_add_buffer_bind(struct push_builder *pb,
VkSparseBufferMemoryBindInfo *bind_info)
{
VK_FROM_HANDLE(nvk_buffer, buffer, bind_info->buffer);
for (unsigned i = 0; i < bind_info->bindCount; i++) {
const VkSparseMemoryBind *bind = &bind_info->pBinds[i];
VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
assert(bind->resourceOffset + bind->size <= buffer->vma_size_B);
assert(!mem || bind->memoryOffset + bind->size <= mem->vk.size);
push_bind(pb, &(struct drm_nouveau_vm_bind_op) {
.op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
DRM_NOUVEAU_VM_BIND_OP_UNMAP,
.handle = mem ? mem->bo->handle : 0,
.addr = buffer->addr + bind->resourceOffset,
.bo_offset = bind->memoryOffset,
.range = bind->size,
});
}
}
static void
push_add_image_plane_bind(struct push_builder *pb,
const struct nvk_image_plane *plane,
const VkSparseImageMemoryBind *bind)
{
VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
uint64_t image_bind_offset_B;
const uint64_t mem_bind_offset_B = bind->memoryOffset;
const uint32_t layer = bind->subresource.arrayLayer;
const uint32_t level = bind->subresource.mipLevel;
const struct nil_tiling plane_tiling = plane->nil.levels[level].tiling;
const uint32_t tile_size_B = nil_tiling_size_B(&plane_tiling);
const struct nil_Extent4D_Pixels bind_extent_px = {
.width = bind->extent.width,
.height = bind->extent.height,
.depth = bind->extent.depth,
.array_len = 1,
};
const struct nil_Offset4D_Pixels bind_offset_px = {
.x = bind->offset.x,
.y = bind->offset.y,
.z = bind->offset.z,
.a = layer,
};
const struct nil_Extent4D_Pixels level_extent_px =
nil_image_level_extent_px(&plane->nil, level);
const struct nil_Extent4D_Tiles level_extent_tl =
nil_extent4d_px_to_tl(level_extent_px, &plane_tiling,
plane->nil.format,
plane->nil.sample_layout);
/* Convert the extent and offset to tiles */
const struct nil_Extent4D_Tiles bind_extent_tl =
nil_extent4d_px_to_tl(bind_extent_px, &plane_tiling,
plane->nil.format,
plane->nil.sample_layout);
const struct nil_Offset4D_Tiles bind_offset_tl =
nil_offset4d_px_to_tl(bind_offset_px, &plane_tiling,
plane->nil.format,
plane->nil.sample_layout);
image_bind_offset_B =
nil_image_level_layer_offset_B(&plane->nil, level, layer);
/* We can only bind contiguous ranges, so we'll split the image into rows
* of tiles that are guaranteed to be contiguous, and bind in terms of
* these rows
*/
/* First, get the size of the bind. Since we have the extent in terms of
* tiles already, we just need to multiply that by the tile size to get
* the size in bytes
*/
uint64_t row_bind_size_B = bind_extent_tl.width * tile_size_B;
/* Second, start walking the binding region in units of tiles, starting
* from the third dimension
*/
for (uint32_t z_tl = 0; z_tl < bind_extent_tl.depth; z_tl++) {
/* Start walking the rows to be bound */
for (uint32_t y_tl = 0; y_tl < bind_extent_tl.height; y_tl++) {
/* For the bind offset, get a memory offset to the start of the row
* in terms of the bind extent
*/
const uint64_t mem_row_start_tl =
y_tl * bind_extent_tl.width +
z_tl * bind_extent_tl.width * bind_extent_tl.height;
const uint32_t image_x_tl = bind_offset_tl.x;
const uint32_t image_y_tl = bind_offset_tl.y + y_tl;
const uint32_t image_z_tl = bind_offset_tl.z + z_tl;
/* The image offset is calculated in terms of the level extent */
const uint64_t image_row_start_tl =
image_x_tl +
image_y_tl * level_extent_tl.width +
image_z_tl * level_extent_tl.width * level_extent_tl.height;
push_bind(pb, &(struct drm_nouveau_vm_bind_op) {
.op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
DRM_NOUVEAU_VM_BIND_OP_UNMAP,
.handle = mem ? mem->bo->handle : 0,
.addr = plane->addr + image_bind_offset_B +
image_row_start_tl * tile_size_B,
.bo_offset = mem_bind_offset_B +
mem_row_start_tl * tile_size_B,
.range = row_bind_size_B,
.flags = plane->nil.pte_kind,
});
}
}
}
static void
push_add_image_bind(struct push_builder *pb,
VkSparseImageMemoryBindInfo *bind_info)
{
VK_FROM_HANDLE(nvk_image, image, bind_info->image);
/* Sparse residency with multiplane is currently not supported */
assert(image->plane_count == 1);
for (unsigned i = 0; i < bind_info->bindCount; i++) {
push_add_image_plane_bind(pb, &image->planes[0],
&bind_info->pBinds[i]);
}
}
static bool
next_opaque_bind_plane(const VkSparseMemoryBind *bind,
uint64_t size_B, uint32_t align_B,
uint64_t *plane_offset_B,
uint64_t *mem_offset_B,
uint64_t *bind_size_B,
uint64_t *image_plane_offset_B_iter)
{
/* Figure out the offset to thise plane and increment _iter up-front so
* that we're free to early return elsewhere in the function.
*/
*image_plane_offset_B_iter = align64(*image_plane_offset_B_iter, align_B);
const uint64_t image_plane_offset_B = *image_plane_offset_B_iter;
*image_plane_offset_B_iter += size_B;
/* Offset into the image or image mip tail, as appropriate */
uint64_t bind_offset_B = bind->resourceOffset;
if (bind_offset_B >= NVK_MIP_TAIL_START_OFFSET)
bind_offset_B -= NVK_MIP_TAIL_START_OFFSET;
if (bind_offset_B < image_plane_offset_B) {
/* The offset of the plane within the bind */
const uint64_t bind_plane_offset_B =
image_plane_offset_B - bind_offset_B;
/* If this plane lies above the bound range, skip this plane */
if (bind_plane_offset_B >= bind->size)
return false;
*plane_offset_B = 0;
*mem_offset_B = bind->memoryOffset + bind_plane_offset_B;
*bind_size_B = MIN2(bind->size - bind_plane_offset_B, size_B);
} else {
/* The offset of the bind within the plane */
const uint64_t plane_bind_offset_B =
bind_offset_B - image_plane_offset_B;
/* If this plane lies below the bound range, skip this plane */
if (plane_bind_offset_B >= size_B)
return false;
*plane_offset_B = plane_bind_offset_B;
*mem_offset_B = bind->memoryOffset;
*bind_size_B = MIN2(bind->size, size_B - plane_bind_offset_B);
}
return true;
}
static void
push_add_image_plane_opaque_bind(struct push_builder *pb,
const struct nvk_image_plane *plane,
const VkSparseMemoryBind *bind,
uint64_t *image_plane_offset_B)
{
uint64_t plane_offset_B, mem_offset_B, bind_size_B;
if (!next_opaque_bind_plane(bind, plane->nil.size_B, plane->nil.align_B,
&plane_offset_B, &mem_offset_B, &bind_size_B,
image_plane_offset_B))
return;
VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
assert(plane->vma_size_B == plane->nil.size_B);
assert(plane_offset_B + bind_size_B <= plane->vma_size_B);
assert(!mem || mem_offset_B + bind_size_B <= mem->vk.size);
push_bind(pb, &(struct drm_nouveau_vm_bind_op) {
.op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
DRM_NOUVEAU_VM_BIND_OP_UNMAP,
.handle = mem ? mem->bo->handle : 0,
.addr = plane->addr + plane_offset_B,
.bo_offset = mem_offset_B,
.range = bind_size_B,
.flags = plane->nil.pte_kind,
});
}
static void
push_add_image_plane_mip_tail_bind(struct push_builder *pb,
const struct nvk_image_plane *plane,
const VkSparseMemoryBind *bind,
uint64_t *image_plane_offset_B)
{
const uint64_t mip_tail_offset_B =
nil_image_mip_tail_offset_B(&plane->nil);
const uint64_t mip_tail_size_B =
nil_image_mip_tail_size_B(&plane->nil);
const uint64_t mip_tail_stride_B = plane->nil.array_stride_B;
const uint64_t whole_mip_tail_size_B =
mip_tail_size_B * plane->nil.extent_px.array_len;
uint64_t plane_offset_B, mem_offset_B, bind_size_B;
if (!next_opaque_bind_plane(bind, whole_mip_tail_size_B, plane->nil.align_B,
&plane_offset_B, &mem_offset_B, &bind_size_B,
image_plane_offset_B))
return;
VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
/* Range within the virtual mip_tail space */
const uint64_t mip_bind_start_B = plane_offset_B;
const uint64_t mip_bind_end_B = mip_bind_start_B + bind_size_B;
/* Range of array slices covered by this bind */
const uint32_t start_a = mip_bind_start_B / mip_tail_size_B;
const uint32_t end_a = DIV_ROUND_UP(mip_bind_end_B, mip_tail_size_B);
for (uint32_t a = start_a; a < end_a; a++) {
/* Range within the virtual mip_tail space of this array slice */
const uint64_t a_mip_bind_start_B =
MAX2(a * mip_tail_size_B, mip_bind_start_B);
const uint64_t a_mip_bind_end_B =
MIN2((a + 1) * mip_tail_size_B, mip_bind_end_B);
/* Offset and range within this mip_tail slice */
const uint64_t a_offset_B = a_mip_bind_start_B - a * mip_tail_size_B;
const uint64_t a_range_B = a_mip_bind_end_B - a_mip_bind_start_B;
/* Offset within the current bind operation */
const uint64_t a_bind_offset_B =
a_mip_bind_start_B - mip_bind_start_B;
/* Offset within the image */
const uint64_t a_image_offset_B =
mip_tail_offset_B + (a * mip_tail_stride_B) + a_offset_B;
push_bind(pb, &(struct drm_nouveau_vm_bind_op) {
.op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
DRM_NOUVEAU_VM_BIND_OP_UNMAP,
.handle = mem ? mem->bo->handle : 0,
.addr = plane->addr + a_image_offset_B,
.bo_offset = mem_offset_B + a_bind_offset_B,
.range = a_range_B,
.flags = plane->nil.pte_kind,
});
}
}
static void
push_add_image_opaque_bind(struct push_builder *pb,
VkSparseImageOpaqueMemoryBindInfo *bind_info)
{
VK_FROM_HANDLE(nvk_image, image, bind_info->image);
for (unsigned i = 0; i < bind_info->bindCount; i++) {
const VkSparseMemoryBind *bind = &bind_info->pBinds[i];
uint64_t image_plane_offset_B = 0;
for (unsigned plane = 0; plane < image->plane_count; plane++) {
if (bind->resourceOffset >= NVK_MIP_TAIL_START_OFFSET) {
push_add_image_plane_mip_tail_bind(pb, &image->planes[plane],
bind, &image_plane_offset_B);
} else {
push_add_image_plane_opaque_bind(pb, &image->planes[plane],
bind, &image_plane_offset_B);
}
}
if (image->stencil_copy_temp.nil.size_B > 0) {
push_add_image_plane_opaque_bind(pb, &image->stencil_copy_temp,
bind, &image_plane_offset_B);
}
}
}
static void
push_add_push(struct push_builder *pb, uint64_t addr, uint32_t range,
bool no_prefetch)
{
/* This is the hardware limit on all current GPUs */
assert((addr % 4) == 0 && (range % 4) == 0);
assert(range < (1u << 23));
uint32_t flags = 0;
if (no_prefetch)
flags |= DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
assert(pb->req.push_count < pb->max_push);
pb->req_push[pb->req.push_count++] = (struct drm_nouveau_exec_push) {
.va = addr,
.va_len = range,
.flags = flags,
};
}
static VkResult
bind_submit(struct nvk_queue *queue, struct push_builder *pb, bool sync)
{
struct nvk_device *dev = nvk_queue_device(queue);
int err;
pb->vmbind.wait_count = pb->req.wait_count;
pb->vmbind.sig_count = pb->req.sig_count;
err = drmCommandWriteRead(dev->ws_dev->fd,
DRM_NOUVEAU_VM_BIND,
&pb->vmbind, sizeof(pb->vmbind));
if (err) {
return vk_errorf(queue, VK_ERROR_UNKNOWN,
"DRM_NOUVEAU_VM_BIND failed: %m");
}
return VK_SUCCESS;
}
static VkResult
push_submit(struct nvk_queue *queue, struct push_builder *pb, bool sync)
{
struct nvk_device *dev = nvk_queue_device(queue);
int err;
if (sync) {
assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS);
pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) {
.flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
.handle = queue->drm.syncobj,
.timeline_value = 0,
};
}
err = drmCommandWriteRead(dev->ws_dev->fd,
DRM_NOUVEAU_EXEC,
&pb->req, sizeof(pb->req));
if (err) {
VkResult result = VK_ERROR_UNKNOWN;
if (err == -ENODEV)
result = VK_ERROR_DEVICE_LOST;
return vk_errorf(queue, result,
"DRM_NOUVEAU_EXEC failed: %m");
}
if (sync) {
err = drmSyncobjWait(dev->ws_dev->fd,
&queue->drm.syncobj, 1, INT64_MAX,
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
NULL);
if (err) {
return vk_errorf(queue, VK_ERROR_UNKNOWN,
"DRM_SYNCOBJ_WAIT failed: %m");
}
/* Push an empty again, just to check for errors */
struct drm_nouveau_exec empty = {
.channel = pb->req.channel,
};
err = drmCommandWriteRead(dev->ws_dev->fd,
DRM_NOUVEAU_EXEC,
&empty, sizeof(empty));
if (err) {
return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
"DRM_NOUVEAU_EXEC failed: %m");
}
}
return VK_SUCCESS;
}
VkResult
nvk_queue_init_drm_nouveau(struct nvk_device *dev,
struct nvk_queue *queue,
VkQueueFlags queue_flags)
{
VkResult result;
int err;
enum nouveau_ws_engines engines = 0;
if (queue_flags & VK_QUEUE_GRAPHICS_BIT)
engines |= NOUVEAU_WS_ENGINE_3D;
if (queue_flags & VK_QUEUE_COMPUTE_BIT)
engines |= NOUVEAU_WS_ENGINE_COMPUTE;
if (queue_flags & VK_QUEUE_TRANSFER_BIT)
engines |= NOUVEAU_WS_ENGINE_COPY;
err = nouveau_ws_context_create(dev->ws_dev, engines, &queue->drm.ws_ctx);
if (err != 0) {
if (err == -ENOSPC)
return vk_error(dev, VK_ERROR_TOO_MANY_OBJECTS);
else
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
}
err = drmSyncobjCreate(dev->ws_dev->fd, 0, &queue->drm.syncobj);
if (err < 0) {
result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_context;
}
return VK_SUCCESS;
fail_context:
nouveau_ws_context_destroy(queue->drm.ws_ctx);
return result;
}
void
nvk_queue_finish_drm_nouveau(struct nvk_device *dev,
struct nvk_queue *queue)
{
ASSERTED int err = drmSyncobjDestroy(dev->ws_dev->fd, queue->drm.syncobj);
assert(err == 0);
nouveau_ws_context_destroy(queue->drm.ws_ctx);
}
VkResult
nvk_queue_submit_simple_drm_nouveau(struct nvk_queue *queue,
uint32_t push_dw_count,
struct nouveau_ws_bo *push_bo,
uint32_t extra_bo_count,
struct nouveau_ws_bo **extra_bos)
{
struct push_builder pb;
push_builder_init(queue, &pb, false);
push_add_push(&pb, push_bo->offset, push_dw_count * 4, false);
return push_submit(queue, &pb, true);
}
static void
push_add_queue_state(struct push_builder *pb, struct nvk_queue_state *qs)
{
if (qs->push.bo)
push_add_push(pb, qs->push.bo->offset, qs->push.dw_count * 4, false);
}
VkResult
nvk_queue_submit_drm_nouveau(struct nvk_queue *queue,
struct vk_queue_submit *submit,
bool sync)
{
struct nvk_device *dev = nvk_queue_device(queue);
struct push_builder pb;
VkResult result;
uint64_t upload_time_point;
result = nvk_upload_queue_flush(dev, &dev->upload, &upload_time_point);
if (result != VK_SUCCESS)
return result;
const bool is_vmbind = submit->buffer_bind_count > 0 ||
submit->image_bind_count > 0 ||
submit->image_opaque_bind_count > 0;
push_builder_init(queue, &pb, is_vmbind);
if (!is_vmbind && upload_time_point > 0)
push_add_syncobj_wait(&pb, dev->upload.drm.syncobj, upload_time_point);
for (uint32_t i = 0; i < submit->wait_count; i++)
push_add_sync_wait(&pb, &submit->waits[i]);
if (is_vmbind) {
assert(submit->command_buffer_count == 0);
for (uint32_t i = 0; i < submit->buffer_bind_count; i++)
push_add_buffer_bind(&pb, &submit->buffer_binds[i]);
for (uint32_t i = 0; i < submit->image_bind_count; i++)
push_add_image_bind(&pb, &submit->image_binds[i]);
for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++)
push_add_image_opaque_bind(&pb, &submit->image_opaque_binds[i]);
} else if (submit->command_buffer_count > 0) {
assert(submit->buffer_bind_count == 0);
assert(submit->image_bind_count == 0);
assert(submit->image_opaque_bind_count == 0);
push_add_queue_state(&pb, &queue->state);
for (unsigned i = 0; i < submit->command_buffer_count; i++) {
struct nvk_cmd_buffer *cmd =
container_of(submit->command_buffers[i], struct nvk_cmd_buffer, vk);
util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, push) {
if (push->range == 0)
continue;
if (pb.req.push_count >= pb.max_push) {
result = push_submit(queue, &pb, sync);
if (result != VK_SUCCESS)
return result;
push_builder_init(queue, &pb, is_vmbind);
}
push_add_push(&pb, push->addr, push->range, push->no_prefetch);
}
}
}
for (uint32_t i = 0; i < submit->signal_count; i++)
push_add_sync_signal(&pb, &submit->signals[i]);
if (is_vmbind)
return bind_submit(queue, &pb, sync);
else
return push_submit(queue, &pb, sync);
}