v3dv: support submits without a command buffer

It is valid to submit with an empty list ofcommand buffers, however,
we still need to wait on the pWaitSemaphores provided and only signal
the pSignalSemaphores and fence once we have finished waiting on them
to honor the semantics of the submission.

Because waiting and signaling happens in the kernel, the easiest way
to do this is to submit a trivial no-op job to the GPU. To do this,
we need to refactor some of our code so that code that might have been
operating on a command buffer starts operating on a job instead, so we
can resuse most of our infrastructure to create the no-op job.

Additionally, because no-op jobs are created internally by the driver,
we are responsible for destroying them too. For this, we bind a fence
to each no-op job we submit and we test for completion of in-flight
no-op jobs (and destory them if completed) every time vkQueueSubmit
is called.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
Iago Toral Quiroga 2020-03-13 11:35:06 +01:00 committed by Marge Bot
parent d9152cb949
commit 8ed2e53e0d
7 changed files with 346 additions and 103 deletions

View File

@ -54,7 +54,7 @@ v3dv_cl_destroy(struct v3dv_cl *cl)
{
if (cl->bo) {
assert(cl->job);
v3dv_bo_free(cl->job->cmd_buffer->device, cl->bo);
v3dv_bo_free(cl->job->device, cl->bo);
}
/* Leave the CL in a reset state to catch use after destroy instances */
@ -71,7 +71,7 @@ v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment)
return offset;
}
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->cmd_buffer->device, space, "CL");
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL");
if (!bo) {
fprintf(stderr, "failed to allocate memory for command list");
abort();
@ -79,7 +79,7 @@ v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment)
v3dv_job_add_bo(cl->job, bo);
bool ok = v3dv_bo_map(cl->job->cmd_buffer->device, bo, bo->size);
bool ok = v3dv_bo_map(cl->job->device, bo, bo->size);
if (!ok) {
fprintf(stderr, "failed to map command list buffer");
abort();
@ -100,7 +100,7 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
if (v3dv_cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
return;
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->cmd_buffer->device, space, "CL");
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL");
if (!bo) {
fprintf(stderr, "failed to allocate memory for command list");
abort();
@ -115,7 +115,7 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
v3dv_job_add_bo(cl->job, bo);
bool ok = v3dv_bo_map(cl->job->cmd_buffer->device, bo, bo->size);
bool ok = v3dv_bo_map(cl->job->device, bo, bo->size);
if (!ok) {
fprintf(stderr, "failed to map command list buffer");
abort();

View File

@ -156,8 +156,8 @@ cmd_buffer_create(struct v3dv_device *device,
return VK_SUCCESS;
}
static void
job_destroy(struct v3dv_job *job)
void
v3dv_job_destroy(struct v3dv_job *job)
{
assert(job);
@ -180,14 +180,14 @@ job_destroy(struct v3dv_job *job)
set_foreach(job->extra_bos, entry) {
struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
v3dv_bo_free(job->cmd_buffer->device, bo);
v3dv_bo_free(job->device, bo);
}
_mesa_set_destroy(job->extra_bos, NULL);
v3dv_bo_free(job->cmd_buffer->device, job->tile_alloc);
v3dv_bo_free(job->cmd_buffer->device, job->tile_state);
v3dv_bo_free(job->device, job->tile_alloc);
v3dv_bo_free(job->device, job->tile_state);
vk_free(&job->cmd_buffer->device->alloc, job);
vk_free(&job->device->alloc, job);
}
static void
@ -197,11 +197,11 @@ cmd_buffer_free_resources(struct v3dv_cmd_buffer *cmd_buffer)
list_for_each_entry_safe(struct v3dv_job, job,
&cmd_buffer->submit_jobs, list_link) {
job_destroy(job);
v3dv_job_destroy(job);
}
if (cmd_buffer->state.job)
job_destroy(cmd_buffer->state.job);
v3dv_job_destroy(cmd_buffer->state.job);
if (cmd_buffer->state.attachments) {
assert(cmd_buffer->state.attachment_count > 0);
@ -373,14 +373,13 @@ job_compute_frame_tiling(struct v3dv_job *job,
}
void
v3dv_cmd_buffer_start_frame(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t width,
uint32_t height,
uint32_t layers,
uint32_t render_target_count,
uint8_t max_internal_bpp)
v3dv_job_start_frame(struct v3dv_job *job,
uint32_t width,
uint32_t height,
uint32_t layers,
uint32_t render_target_count,
uint8_t max_internal_bpp)
{
struct v3dv_job *job = cmd_buffer->state.job;
assert(job);
/* Start by computing frame tiling spec for this job */
@ -413,7 +412,7 @@ v3dv_cmd_buffer_start_frame(struct v3dv_cmd_buffer *cmd_buffer,
*/
tile_alloc_size += 512 * 1024;
job->tile_alloc = v3dv_bo_alloc(cmd_buffer->device, tile_alloc_size,
job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
"tile_alloc");
v3dv_job_add_bo(job, job->tile_alloc);
@ -422,7 +421,7 @@ v3dv_cmd_buffer_start_frame(struct v3dv_cmd_buffer *cmd_buffer,
tiling->draw_tiles_x *
tiling->draw_tiles_y *
tsda_per_tile_size;
job->tile_state = v3dv_bo_alloc(cmd_buffer->device, tile_state_size, "TSDA");
job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA");
v3dv_job_add_bo(job, job->tile_state);
/* This must go before the binning mode configuration. It is
@ -483,6 +482,41 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
cmd_buffer->state.job = NULL;
}
void
v3dv_job_init(struct v3dv_job *job,
struct v3dv_device *device,
struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx)
{
assert(job);
job->device = device;
job->cmd_buffer = cmd_buffer;
job->bos =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
job->bo_count = 0;
job->extra_bos =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
v3dv_cl_init(job, &job->bcl);
v3dv_cl_begin(&job->bcl);
v3dv_cl_init(job, &job->rcl);
v3dv_cl_begin(&job->rcl);
v3dv_cl_init(job, &job->indirect);
v3dv_cl_begin(&job->indirect);
/* Keep track of the first subpass that we are recording in this new job.
* We will use this when we emit the RCL to decide how to emit our loads
* and stores.
*/
if (cmd_buffer && cmd_buffer->state.pass)
job->first_subpass = subpass_idx;
}
struct v3dv_job *
v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx)
@ -514,30 +548,7 @@ v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
return NULL;
}
job->cmd_buffer = cmd_buffer;
job->bos =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
job->bo_count = 0;
job->extra_bos =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
v3dv_cl_init(job, &job->bcl);
v3dv_cl_begin(&job->bcl);
v3dv_cl_init(job, &job->rcl);
v3dv_cl_begin(&job->rcl);
v3dv_cl_init(job, &job->indirect);
v3dv_cl_begin(&job->indirect);
/* Keep track of the first subpass that we are recording in this new job.
* We will use this when we emit the RCL to decide how to emit our loads
* and stores.
*/
if (cmd_buffer->state.pass)
job->first_subpass = subpass_idx;
v3dv_job_init(job, cmd_buffer->device, cmd_buffer, subpass_idx);
return job;
}
@ -1506,12 +1517,12 @@ subpass_start(struct v3dv_cmd_buffer *cmd_buffer, uint32_t subpass_idx)
const uint8_t internal_bpp =
v3dv_framebuffer_compute_internal_bpp(framebuffer, subpass);
v3dv_cmd_buffer_start_frame(cmd_buffer,
framebuffer->width,
framebuffer->height,
framebuffer->layers,
subpass->color_count,
internal_bpp);
v3dv_job_start_frame(job,
framebuffer->width,
framebuffer->height,
framebuffer->layers,
subpass->color_count,
internal_bpp);
}
/* If we don't have a scissor or viewport defined let's just use the render

View File

@ -1026,12 +1026,17 @@ queue_init(struct v3dv_device *device, struct v3dv_queue *queue)
queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
queue->device = device;
queue->flags = 0;
list_inithead(&queue->noop_jobs);
return VK_SUCCESS;
}
static void
queue_finish(struct v3dv_queue *queue)
{
/* We wait for the device to be idle before finishing the queue, so
* this should any pending jobs.
*/
v3dv_queue_destroy_completed_noop_jobs(queue);
}
static void
@ -1169,8 +1174,9 @@ v3dv_DestroyDevice(VkDevice _device,
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
drmSyncobjDestroy(device->render_fd, device->last_job_sync);
v3dv_DeviceWaitIdle(_device);
queue_finish(&device->queue);
drmSyncobjDestroy(device->render_fd, device->last_job_sync);
vk_free2(&default_alloc, pAllocator, device);
}

View File

@ -262,12 +262,12 @@ emit_tlb_clear_job(struct v3dv_cmd_buffer *cmd_buffer,
const uint8_t internal_bpp =
v3dv_framebuffer_compute_internal_bpp(framebuffer, subpass);
v3dv_cmd_buffer_start_frame(cmd_buffer,
framebuffer->width,
framebuffer->height,
framebuffer->layers,
color_attachment_count,
internal_bpp);
v3dv_job_start_frame(job,
framebuffer->width,
framebuffer->height,
framebuffer->layers,
color_attachment_count,
internal_bpp);
struct v3dv_cl *rcl = &job->rcl;
v3dv_cl_ensure_space_with_branch(rcl, 200 +
@ -427,12 +427,12 @@ emit_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
*/
job = v3dv_cmd_buffer_start_job(cmd_buffer, false);
v3dv_cmd_buffer_start_frame(cmd_buffer,
subpass_tiling.width,
subpass_tiling.height,
subpass_tiling.layers,
subpass_tiling.render_target_count,
subpass_tiling.internal_bpp);
v3dv_job_start_frame(job,
subpass_tiling.width,
subpass_tiling.height,
subpass_tiling.layers,
subpass_tiling.render_target_count,
subpass_tiling.internal_bpp);
job->is_subpass_continue = true;
}

View File

@ -647,10 +647,10 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
return;
v3dv_cmd_buffer_start_frame(cmd_buffer,
region->imageExtent.width,
region->imageExtent.height,
num_layers, 1, internal_bpp);
v3dv_job_start_frame(job,
region->imageExtent.width,
region->imageExtent.height,
num_layers, 1, internal_bpp);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, internal_type, &job->frame_tiling);
@ -790,10 +790,10 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
return;
v3dv_cmd_buffer_start_frame(cmd_buffer,
region->extent.width,
region->extent.height,
num_layers, 1, internal_bpp);
v3dv_job_start_frame(job,
region->extent.width,
region->extent.height,
num_layers, 1, internal_bpp);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, internal_type, &job->frame_tiling);
@ -935,8 +935,7 @@ clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
return;
v3dv_cmd_buffer_start_frame(cmd_buffer, width, height, 1,
1, internal_bpp);
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, internal_type, &job->frame_tiling);
@ -1143,8 +1142,7 @@ copy_buffer(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
v3dv_cmd_buffer_start_frame(cmd_buffer, width, height, 1,
1, internal_bpp);
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, internal_type, &job->frame_tiling);
@ -1315,7 +1313,7 @@ fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
v3dv_cmd_buffer_start_frame(cmd_buffer, width, height, 1, 1, internal_bpp);
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, internal_type, &job->frame_tiling);
@ -1502,10 +1500,10 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
return;
v3dv_cmd_buffer_start_frame(cmd_buffer,
region->imageExtent.width,
region->imageExtent.height,
num_layers, 1, internal_bpp);
v3dv_job_start_frame(job,
region->imageExtent.width,
region->imageExtent.height,
num_layers, 1, internal_bpp);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, internal_type, &job->frame_tiling);

View File

@ -202,9 +202,16 @@ struct v3dv_queue {
VkDeviceQueueCreateFlags flags;
/* FIXME: stub */
/* When the client submits to the queue without a command buffer the queue
* needs to create and submit a no-op job and it is then responsible from
* destroying it once it has completed execution. This list keeps references
* to all no-op jobs in flight so we can do that.
*/
struct list_head noop_jobs;
};
void v3dv_queue_destroy_completed_noop_jobs(struct v3dv_queue *queue);
struct v3dv_device {
VK_LOADER_DATA _loader_data;
@ -536,6 +543,8 @@ enum v3dv_ez_state {
struct v3dv_job {
struct list_head list_link;
struct v3dv_device *device;
struct v3dv_cmd_buffer *cmd_buffer;
struct v3dv_cl bcl;
@ -573,11 +582,30 @@ struct v3dv_job {
enum v3dv_ez_state ez_state;
enum v3dv_ez_state first_ez_state;
/* Typically, the client is responsible for handling the life-time of
* command buffers by using fences to tell when they are no longer in
* use by the GPU, however, when the jobs that are submitted to the GPU
* are created internally by the driver (for example when we need to
* submit no-op jobs), then it is our responsibility to do that.
*/
struct v3dv_fence *fence;
};
void v3dv_job_init(struct v3dv_job *job,
struct v3dv_device *device,
struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx);
void v3dv_job_destroy(struct v3dv_job *job);
void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
void v3dv_job_add_extra_bo(struct v3dv_job *job, struct v3dv_bo *bo);
void v3dv_job_emit_binning_flush(struct v3dv_job *job);
void v3dv_job_start_frame(struct v3dv_job *job,
uint32_t width,
uint32_t height,
uint32_t layers,
uint32_t render_target_count,
uint8_t max_internal_bpp);
struct v3dv_vertex_binding {
struct v3dv_buffer *buffer;
@ -647,12 +675,6 @@ struct v3dv_cmd_buffer {
struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx);
void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
void v3dv_cmd_buffer_start_frame(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t width,
uint32_t height,
uint32_t layers,
uint32_t render_target_count,
uint8_t max_internal_bpp);
void v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
int rt,

View File

@ -132,7 +132,7 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
}
static VkResult
queue_submit_job(struct v3dv_job *job, bool do_wait)
queue_submit_job(struct v3dv_queue *queue, struct v3dv_job *job, bool do_wait)
{
assert(job);
@ -150,10 +150,10 @@ queue_submit_job(struct v3dv_job *job, bool do_wait)
* we have more than one semaphore to wait on.
*/
submit.in_sync_bcl = 0;
submit.in_sync_rcl = do_wait ? job->cmd_buffer->device->last_job_sync : 0;
submit.in_sync_rcl = do_wait ? queue->device->last_job_sync : 0;
/* Update the sync object for the last rendering by this device. */
submit.out_sync = job->cmd_buffer->device->last_job_sync;
submit.out_sync = queue->device->last_job_sync;
submit.bcl_start = job->bcl.bo->offset;
submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
@ -182,10 +182,10 @@ queue_submit_job(struct v3dv_job *job, bool do_wait)
assert(bo_idx == submit.bo_handle_count);
submit.bo_handles = (uintptr_t)(void *)bo_handles;
struct v3dv_device *device = job->cmd_buffer->device;
v3dv_clif_dump(device, job, &submit);
v3dv_clif_dump(queue->device, job, &submit);
int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_SUBMIT_CL, &submit);
int ret = v3dv_ioctl(queue->device->render_fd,
DRM_IOCTL_V3D_SUBMIT_CL, &submit);
static bool warned = false;
if (ret && !warned) {
fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
@ -202,12 +202,14 @@ queue_submit_job(struct v3dv_job *job, bool do_wait)
}
static VkResult
queue_submit_cmd_buffer(struct v3dv_cmd_buffer *cmd_buffer,
queue_submit_cmd_buffer(struct v3dv_queue *queue,
struct v3dv_cmd_buffer *cmd_buffer,
const VkSubmitInfo *pSubmit)
{
list_for_each_entry_safe(struct v3dv_job, job,
&cmd_buffer->submit_jobs, list_link) {
VkResult result = queue_submit_job(job, pSubmit->waitSemaphoreCount > 0);
VkResult result = queue_submit_job(queue, job,
pSubmit->waitSemaphoreCount > 0);
if (result != VK_SUCCESS)
return result;
}
@ -215,20 +217,222 @@ queue_submit_cmd_buffer(struct v3dv_cmd_buffer *cmd_buffer,
return VK_SUCCESS;
}
static void
emit_noop_bin(struct v3dv_job *job)
{
v3dv_job_start_frame(job, 1, 1, 1, 1, V3D_INTERNAL_BPP_32);
v3dv_job_emit_binning_flush(job);
}
static void
emit_noop_render(struct v3dv_job *job)
{
struct v3dv_cl *rcl = &job->rcl;
v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 *
cl_packet_length(SUPERTILE_COORDINATES));
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
config.early_z_disable = true;
config.image_width_pixels = 1;
config.image_height_pixels = 1;
config.number_of_render_targets = 1;
config.multisample_mode_4x = false;
config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
}
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
}
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
clear.z_clear_value = 1.0f;
clear.stencil_clear_value = 0;
};
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
init.use_auto_chained_tile_lists = true;
init.size_of_first_block_in_chained_tile_lists =
TILE_ALLOCATION_BLOCK_SIZE_64B;
}
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
list.address = v3dv_cl_address(job->tile_alloc, 0);
}
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
config.number_of_bin_tile_lists = 1;
config.total_frame_width_in_tiles = 1;
config.total_frame_height_in_tiles = 1;
config.supertile_width_in_tiles = 1;
config.supertile_height_in_tiles = 1;
config.total_frame_width_in_supertiles = 1;
config.total_frame_height_in_supertiles = 1;
}
struct v3dv_cl *icl = &job->indirect;
v3dv_cl_ensure_space(icl, 200, 1);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl);
cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords);
cl_emit(icl, END_OF_LOADS, end);
cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = NONE;
}
cl_emit(icl, END_OF_TILE_MARKER, end);
cl_emit(icl, RETURN_FROM_SUB_LIST, ret);
cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(icl);
}
cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
coords.column_number_in_supertiles = 0;
coords.row_number_in_supertiles = 0;
}
cl_emit(rcl, END_OF_RENDERING, end);
}
static VkResult
queue_create_noop_job(struct v3dv_queue *queue, struct v3dv_job **job)
{
*job = vk_zalloc(&queue->device->alloc, sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!*job)
return VK_ERROR_OUT_OF_HOST_MEMORY;
v3dv_job_init(*job, queue->device, NULL, -1);
emit_noop_bin(*job);
emit_noop_render(*job);
return VK_SUCCESS;
}
void
v3dv_queue_destroy_completed_noop_jobs(struct v3dv_queue *queue)
{
struct v3dv_device *device = queue->device;
VkDevice _device = v3dv_device_to_handle(device);
list_for_each_entry_safe(struct v3dv_job, job,
&queue->noop_jobs, list_link) {
assert(job->fence);
if (!drmSyncobjWait(device->render_fd, &job->fence->sync, 1, 0, 0, NULL)) {
v3dv_job_destroy(job);
v3dv_DestroyFence(_device, v3dv_fence_to_handle(job->fence), NULL);
}
}
}
static VkResult
queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)
{
VkResult result;
bool can_destroy_job = true;
struct v3dv_device *device = queue->device;
VkDevice _device = v3dv_device_to_handle(device);
/* Create noop job */
struct v3dv_job *job;
result = queue_create_noop_job(queue, &job);
if (result != VK_SUCCESS)
goto fail_job;
/* Create a fence for the job */
VkFence _fence;
VkFenceCreateInfo fence_info = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = NULL,
.flags = 0
};
result = v3dv_CreateFence(_device, &fence_info, NULL, &_fence);
if (result != VK_SUCCESS)
goto fail_fence;
/* Submit the job */
result = queue_submit_job(queue, job, pSubmit->waitSemaphoreCount > 0);
if (result != VK_SUCCESS)
goto fail_submit;
list_addtail(&job->list_link, &queue->noop_jobs);
/* At this point we have submitted the job for execution and we can no
* longer destroy it until we know it has completed execution on the GPU.
*/
can_destroy_job = false;
/* Bind a fence to the job we have just submitted so we can poll if the job
* has completed.
*/
if (process_fence_to_signal(device, _fence) != VK_SUCCESS) {
/* If we could not bind the fence, then we need to do a sync wait so
* we don't leak the job. If the sync wait also fails, then we are
* out of options.
*/
int ret = drmSyncobjWait(device->render_fd,
&device->last_job_sync, 1, INT64_MAX, 0, NULL);
if (!ret)
can_destroy_job = true;
else
result = VK_ERROR_DEVICE_LOST;
goto fail_signal_fence;
}
job->fence = v3dv_fence_from_handle(_fence);
return result;
fail_signal_fence:
fail_submit:
v3dv_DestroyFence(_device, _fence, NULL);
fail_fence:
if (can_destroy_job)
v3dv_job_destroy(job);
fail_job:
return result;
}
static VkResult
queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
const VkSubmitInfo *pSubmit,
VkFence fence)
{
VkResult result = VK_SUCCESS;
for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
struct v3dv_cmd_buffer *cmd_buffer =
v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
result = queue_submit_cmd_buffer(cmd_buffer, pSubmit);
if (result != VK_SUCCESS)
return result;
/* Even if we don't have any actual work to submit we still need to wait
* on the wait semaphores and signal the signal semaphores and fence, so
* in this scenario we just submit a trivial no-op job so we don't have
* to do anything special, it should not be a common case anyway.
*/
if (pSubmit->commandBufferCount == 0) {
result = queue_submit_noop_job(queue, pSubmit);
} else {
for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
struct v3dv_cmd_buffer *cmd_buffer =
v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit);
if (result != VK_SUCCESS)
break;
}
}
if (result != VK_SUCCESS)
return result;
result = process_semaphores_to_signal(queue->device,
pSubmit->signalSemaphoreCount,
pSubmit->pSignalSemaphores);
@ -246,6 +450,8 @@ v3dv_QueueSubmit(VkQueue _queue,
{
V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
v3dv_queue_destroy_completed_noop_jobs(queue);
VkResult result = VK_SUCCESS;
for (uint32_t i = 0; i < submitCount; i++) {
result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], fence);