turnip: Request no implicit sync when we have no implicit-sync WSI BOs.
I chose to implement this as a global flag in the device, because otherwise we would end up with extra draw overhead trying to avoid it in the implicit-sync WSI case, and you're probably going to end up needing implicit sync anyway because you used one of the BOs in any of the submitted cmdbufs. To do better than this, we would probably want a skip-implicit-sync flag on the BOs in the BO list, rather than global on the submit. Reports about venus on turnip say that this flag reduces worst-case QueueSubmit time in a game workload from ~10ms to ~4ms. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14838>
This commit is contained in:
parent
83ee08f6d1
commit
59bc17d57a
|
@ -2143,6 +2143,20 @@ tu_AllocateMemory(VkDevice _device,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Track in the device whether our BO list contains any implicit-sync BOs, so
|
||||
* we can suppress implicit sync on non-WSI usage.
|
||||
*/
|
||||
const struct wsi_memory_allocate_info *wsi_info =
|
||||
vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
|
||||
if (wsi_info && wsi_info->implicit_sync) {
|
||||
mtx_lock(&device->bo_mutex);
|
||||
if (!mem->bo->implicit_sync) {
|
||||
mem->bo->implicit_sync = true;
|
||||
device->implicit_sync_bo_count++;
|
||||
}
|
||||
mtx_unlock(&device->bo_mutex);
|
||||
}
|
||||
|
||||
*pMem = tu_device_memory_to_handle(mem);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
@ -372,6 +372,9 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
|
|||
struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
|
||||
exchanging_bo->bo_list_idx = bo->bo_list_idx;
|
||||
|
||||
if (bo->implicit_sync)
|
||||
dev->implicit_sync_bo_count--;
|
||||
|
||||
mtx_unlock(&dev->bo_mutex);
|
||||
|
||||
/* Our BO structs are stored in a sparse array in the physical device,
|
||||
|
@ -954,6 +957,9 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
|
|||
|
||||
mtx_lock(&queue->device->bo_mutex);
|
||||
|
||||
if (queue->device->implicit_sync_bo_count == 0)
|
||||
flags |= MSM_SUBMIT_NO_IMPLICIT;
|
||||
|
||||
/* drm_msm_gem_submit_cmd requires index of bo which could change at any
|
||||
* time when bo_mutex is not locked. So we build submit cmds here the real
|
||||
* place to submit.
|
||||
|
|
|
@ -362,6 +362,8 @@ struct tu_bo
|
|||
int32_t refcnt;
|
||||
uint32_t bo_list_idx;
|
||||
#endif
|
||||
|
||||
bool implicit_sync : 1;
|
||||
};
|
||||
|
||||
enum global_shader {
|
||||
|
@ -440,6 +442,8 @@ struct tu_device
|
|||
|
||||
struct tu_bo *global_bo;
|
||||
|
||||
uint32_t implicit_sync_bo_count;
|
||||
|
||||
/* the blob seems to always use 8K factor and 128K param sizes, copy them */
|
||||
#define TU_TESS_FACTOR_SIZE (8 * 1024)
|
||||
#define TU_TESS_PARAM_SIZE (128 * 1024)
|
||||
|
|
Loading…
Reference in New Issue