turnip: Request no implicit sync when we have no implicit-sync WSI BOs.

I chose to implement this as a global flag in the device, because
otherwise we would end up with extra draw overhead trying to avoid it in
the implicit-sync WSI case, and you're probably going to end up needing
implicit sync anyway because you used one of the BOs in any of the
submitted cmdbufs.  To do better than this, we would probably want a
skip-implicit-sync flag on the BOs in the BO list, rather than global on
the submit.

Reports about venus on turnip say that this flag reduces worst-case
QueueSubmit time in a game workload from ~10ms to ~4ms.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14838>
This commit is contained in:
Emma Anholt 2022-01-31 22:13:29 -08:00 committed by Marge Bot
parent 83ee08f6d1
commit 59bc17d57a
3 changed files with 24 additions and 0 deletions

View File

@ -2143,6 +2143,20 @@ tu_AllocateMemory(VkDevice _device,
return result;
}
/* Track in the device whether our BO list contains any implicit-sync BOs, so
* we can suppress implicit sync on non-WSI usage.
*/
const struct wsi_memory_allocate_info *wsi_info =
vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
if (wsi_info && wsi_info->implicit_sync) {
mtx_lock(&device->bo_mutex);
if (!mem->bo->implicit_sync) {
mem->bo->implicit_sync = true;
device->implicit_sync_bo_count++;
}
mtx_unlock(&device->bo_mutex);
}
*pMem = tu_device_memory_to_handle(mem);
return VK_SUCCESS;

View File

@ -372,6 +372,9 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
exchanging_bo->bo_list_idx = bo->bo_list_idx;
if (bo->implicit_sync)
dev->implicit_sync_bo_count--;
mtx_unlock(&dev->bo_mutex);
/* Our BO structs are stored in a sparse array in the physical device,
@ -954,6 +957,9 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
mtx_lock(&queue->device->bo_mutex);
if (queue->device->implicit_sync_bo_count == 0)
flags |= MSM_SUBMIT_NO_IMPLICIT;
/* drm_msm_gem_submit_cmd requires index of bo which could change at any
* time when bo_mutex is not locked. So we build submit cmds here the real
* place to submit.

View File

@ -362,6 +362,8 @@ struct tu_bo
int32_t refcnt;
uint32_t bo_list_idx;
#endif
bool implicit_sync : 1;
};
enum global_shader {
@ -440,6 +442,8 @@ struct tu_device
struct tu_bo *global_bo;
uint32_t implicit_sync_bo_count;
/* the blob seems to always use 8K factor and 128K param sizes, copy them */
#define TU_TESS_FACTOR_SIZE (8 * 1024)
#define TU_TESS_PARAM_SIZE (128 * 1024)