wsi/x11: support explicit sync

This adds support to the X11 WSI for explicit synchronization using DRM
syncobjs. It relies on versions 1.4 of the DRI3 and Present extensions.

Signed-off-by: Erik Kurzinger <ekurzinger@nvidia.com
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27226>
This commit is contained in:
Erik Kurzinger 2024-01-23 18:55:12 -08:00 committed by Marge Bot
parent e637c63239
commit 899263ecfc
5 changed files with 191 additions and 54 deletions

View File

@ -2066,6 +2066,7 @@ if host_machine.cpu_family() == 'x86_64' and cc.get_id() == 'gcc'
endif
with_dri3_modifiers = false
with_dri3_explicit_sync = false
with_xcb_keysyms = false
if with_platform_x11
if with_glx == 'xlib'
@ -2109,6 +2110,10 @@ if with_platform_x11
dep_xcb_present.version().version_compare('>= 1.13'))
with_dri3_modifiers = true
endif
if (dep_xcb_dri3.version().version_compare('>= 1.17') and
dep_xcb_present.version().version_compare('>= 1.17'))
with_dri3_explicit_sync = true
endif
dep_xcb_shm = dependency('xcb-shm')
dep_xcb_sync = dependency('xcb-sync')
dep_xshmfence = dependency('xshmfence', version : '>= 1.1')
@ -2151,6 +2156,9 @@ endif
if with_dri3_modifiers
pre_args += '-DHAVE_DRI3_MODIFIERS'
endif
if with_dri3_explicit_sync
pre_args += '-DHAVE_DRI3_EXPLICIT_SYNC'
endif
if with_gallium_drisw_kms
pre_args += '-DHAVE_DRISW_KMS'
endif

View File

@ -2236,3 +2236,11 @@ wsi_caps_get_image_usage(void)
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
bool
wsi_device_supports_explicit_sync(struct wsi_device *device)
{
return !device->sw && device->has_timeline_semaphore &&
(device->timeline_semaphore_export_handle_types &
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
}

View File

@ -356,6 +356,9 @@ wsi_common_vk_instance_supports_present_wait(const struct vk_instance *instance)
VkImageUsageFlags
wsi_caps_get_image_usage(void);
bool
wsi_device_supports_explicit_sync(struct wsi_device *device);
#define wsi_common_vk_warn_once(warning) \
do { \
static int warned = false; \

View File

@ -208,8 +208,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR,
static bool
wsi_wl_use_explicit_sync(struct wsi_wl_display *display, struct wsi_device *device)
{
return !device->sw && device->has_timeline_semaphore &&
(device->timeline_semaphore_export_handle_types & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) &&
return wsi_device_supports_explicit_sync(device) &&
display->wl_syncobj != NULL;
}

View File

@ -78,6 +78,7 @@
struct wsi_x11_connection {
bool has_dri3;
bool has_dri3_modifiers;
bool has_dri3_explicit_sync;
bool has_present;
bool is_proprietary_x11;
bool is_xwayland;
@ -233,6 +234,8 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
wsi_dev->has_import_memory_host;
bool has_dri3_v1_2 = false;
bool has_present_v1_2 = false;
bool has_dri3_v1_4 = false;
bool has_present_v1_4 = false;
struct wsi_x11_connection *wsi_conn =
vk_alloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8,
@ -292,10 +295,12 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
xcb_dri3_query_version_cookie_t ver_cookie;
xcb_dri3_query_version_reply_t *ver_reply;
ver_cookie = xcb_dri3_query_version(conn, 1, 2);
ver_cookie = xcb_dri3_query_version(conn, 1, 4);
ver_reply = xcb_dri3_query_version_reply(conn, ver_cookie, NULL);
has_dri3_v1_2 = ver_reply != NULL &&
(ver_reply->major_version > 1 || ver_reply->minor_version >= 2);
has_dri3_v1_4 = ver_reply != NULL &&
(ver_reply->major_version > 1 || ver_reply->minor_version >= 4);
free(ver_reply);
}
#endif
@ -306,10 +311,12 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
xcb_present_query_version_cookie_t ver_cookie;
xcb_present_query_version_reply_t *ver_reply;
ver_cookie = xcb_present_query_version(conn, 1, 2);
ver_cookie = xcb_present_query_version(conn, 1, 4);
ver_reply = xcb_present_query_version_reply(conn, ver_cookie, NULL);
has_present_v1_2 =
(ver_reply->major_version > 1 || ver_reply->minor_version >= 2);
has_present_v1_4 =
(ver_reply->major_version > 1 || ver_reply->minor_version >= 4);
free(ver_reply);
}
#endif
@ -329,6 +336,7 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
xwl_reply);
wsi_conn->has_dri3_modifiers = has_dri3_v1_2 && has_present_v1_2;
wsi_conn->has_dri3_explicit_sync = has_dri3_v1_4 && has_present_v1_4;
wsi_conn->is_proprietary_x11 = false;
if (amd_reply && amd_reply->present)
wsi_conn->is_proprietary_x11 = true;
@ -1063,6 +1071,9 @@ struct x11_image {
#define X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS 16
uint32_t present_queued_count;
struct x11_image_pending_completion pending_completions[X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS];
#ifdef HAVE_DRI3_EXPLICIT_SYNC
uint32_t dri3_syncobj[WSI_ES_COUNT];
#endif
};
struct x11_swapchain {
@ -1220,6 +1231,25 @@ x11_get_wsi_image(struct wsi_swapchain *wsi_chain, uint32_t image_index)
static bool
wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain);
static VkResult
x11_wait_for_explicit_sync_release_submission(struct x11_swapchain *chain,
uint64_t rel_timeout_ns,
uint32_t *image_index)
{
STACK_ARRAY(struct wsi_image*, images, chain->base.image_count);
for (uint32_t i = 0; i < chain->base.image_count; i++)
images[i] = &chain->images[i].base;
VkResult result =
wsi_drm_wait_for_explicit_sync_release(&chain->base,
chain->base.image_count,
images,
rel_timeout_ns,
image_index);
STACK_ARRAY_FINISH(images);
return result;
}
/* XXX this belongs in presentproto */
#ifndef PresentWindowDestroyed
#define PresentWindowDestroyed (1 << 0)
@ -1250,6 +1280,7 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain,
case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
xcb_present_idle_notify_event_t *idle = (void *) event;
assert(!chain->base.image_info.explicit_sync);
for (unsigned i = 0; i < chain->base.image_count; i++) {
if (chain->images[i].pixmap == idle->pixmap) {
chain->sent_image_count--;
@ -1362,8 +1393,10 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
xshmfence_reset(image->shm_fence);
++chain->sent_image_count;
assert(chain->sent_image_count <= chain->base.image_count);
if (!chain->base.image_info.explicit_sync) {
++chain->sent_image_count;
assert(chain->sent_image_count <= chain->base.image_count);
}
++chain->send_sbc;
uint32_t serial = (uint32_t)chain->send_sbc;
@ -1375,22 +1408,48 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
.serial = serial,
};
xcb_void_cookie_t cookie =
xcb_present_pixmap(chain->conn,
chain->window,
image->pixmap,
serial,
0, /* valid */
image->update_area, /* update */
0, /* x_off */
0, /* y_off */
XCB_NONE, /* target_crtc */
XCB_NONE,
image->sync_fence,
options,
target_msc,
divisor,
remainder, 0, NULL);
xcb_void_cookie_t cookie;
#ifdef HAVE_DRI3_EXPLICIT_SYNC
if (chain->base.image_info.explicit_sync) {
uint64_t acquire_point = image->base.explicit_sync[WSI_ES_ACQUIRE].timeline;
uint64_t release_point = image->base.explicit_sync[WSI_ES_RELEASE].timeline;
cookie = xcb_present_pixmap_synced(
chain->conn,
chain->window,
image->pixmap,
serial,
0, /* valid */
image->update_area, /* update */
0, /* x_off */
0, /* y_off */
XCB_NONE, /* target_crtc */
image->dri3_syncobj[WSI_ES_ACQUIRE], /* acquire_syncobj */
image->dri3_syncobj[WSI_ES_RELEASE], /* release_syncobj */
acquire_point,
release_point,
options,
target_msc,
divisor,
remainder, 0, NULL);
} else
#endif
{
cookie = xcb_present_pixmap(chain->conn,
chain->window,
image->pixmap,
serial,
0, /* valid */
image->update_area, /* update */
0, /* x_off */
0, /* y_off */
XCB_NONE, /* target_crtc */
XCB_NONE,
image->sync_fence,
options,
target_msc,
divisor,
remainder, 0, NULL);
}
xcb_discard_reply(chain->conn, cookie.sequence);
xcb_flush(chain->conn);
return x11_swapchain_result(chain, VK_SUCCESS);
@ -1403,6 +1462,7 @@ static VkResult
x11_present_to_x11_sw(struct x11_swapchain *chain, uint32_t image_index,
uint64_t target_msc)
{
assert(!chain->base.image_info.explicit_sync);
struct x11_image *image = &chain->images[image_index];
/* Begin querying this before submitting the frame for improved async performance.
@ -1622,10 +1682,13 @@ x11_release_images(struct wsi_swapchain *wsi_chain,
if (chain->status == VK_ERROR_SURFACE_LOST_KHR)
return chain->status;
for (uint32_t i = 0; i < count; i++) {
uint32_t index = indices[i];
assert(index < chain->base.image_count);
wsi_queue_push(&chain->acquire_queue, index);
/* If we're using implicit sync, push images to the acquire queue */
if (!chain->base.image_info.explicit_sync) {
for (uint32_t i = 0; i < count; i++) {
uint32_t index = indices[i];
assert(index < chain->base.image_count);
wsi_queue_push(&chain->acquire_queue, index);
}
}
return VK_SUCCESS;
@ -1658,8 +1721,13 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain,
if (result < 0)
return result;
result = wsi_queue_pull(&chain->acquire_queue,
image_index, timeout);
if (chain->base.image_info.explicit_sync) {
result = x11_wait_for_explicit_sync_release_submission(chain, timeout,
image_index);
} else {
result = wsi_queue_pull(&chain->acquire_queue,
image_index, timeout);
}
if (result == VK_TIMEOUT)
return info->timeout ? VK_TIMEOUT : VK_NOT_READY;
@ -1676,7 +1744,8 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain,
return result;
assert(*image_index < chain->base.image_count);
if (chain->images[*image_index].shm_fence)
if (chain->images[*image_index].shm_fence &&
!chain->base.image_info.explicit_sync)
xshmfence_await(chain->images[*image_index].shm_fence);
return result;
@ -1782,7 +1851,7 @@ x11_manage_event_queue(void *state)
}
}
if (!assume_forward_progress) {
if (!assume_forward_progress && !chain->base.image_info.explicit_sync) {
/* If true, application expects acquire (IDLE) to happen in finite time. */
assume_forward_progress = x11_driver_owned_images(chain) <
forward_progress_guaranteed_acquired_images;
@ -1867,7 +1936,9 @@ x11_manage_present_queue(void *state)
VkPresentModeKHR present_mode = chain->images[image_index].present_mode;
if (x11_needs_wait_for_fences(chain->base.wsi, wsi_conn,
present_mode)) {
present_mode) &&
/* not necessary with explicit sync */
!chain->base.image_info.explicit_sync) {
MESA_TRACE_SCOPE("wait fence");
result = chain->base.wsi->WaitForFences(chain->base.device, 1,
&chain->base.fences[image_index],
@ -1920,7 +1991,8 @@ x11_manage_present_queue(void *state)
pthread_mutex_lock(&chain->thread_state_lock);
x11_swapchain_result(chain, result);
wsi_queue_push(&chain->acquire_queue, UINT32_MAX);
if (!chain->base.image_info.explicit_sync)
wsi_queue_push(&chain->acquire_queue, UINT32_MAX);
pthread_mutex_unlock(&chain->thread_state_lock);
return NULL;
@ -2056,6 +2128,27 @@ x11_image_init(VkDevice device_h, struct x11_swapchain *chain,
goto fail_image;
}
#ifdef HAVE_DRI3_EXPLICIT_SYNC
if (chain->base.image_info.explicit_sync) {
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
image->dri3_syncobj[i] = xcb_generate_id(chain->conn);
int fd = dup(image->base.explicit_sync[i].fd);
if (fd < 0)
goto fail_image;
cookie = xcb_dri3_import_syncobj_checked(chain->conn,
image->dri3_syncobj[i],
chain->window,
fd /* libxcb closes the fd */);
error = xcb_request_check(chain->conn, cookie);
if (error != NULL) {
free(error);
goto fail_image;
}
}
}
#endif
out_fence:
fence_fd = xshmfence_alloc_shm();
if (fence_fd < 0)
@ -2106,6 +2199,15 @@ x11_image_finish(struct x11_swapchain *chain,
cookie = xcb_xfixes_destroy_region(chain->conn, image->update_region);
xcb_discard_reply(chain->conn, cookie.sequence);
#ifdef HAVE_DRI3_EXPLICIT_SYNC
if (chain->base.image_info.explicit_sync) {
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
cookie = xcb_dri3_free_syncobj(chain->conn, image->dri3_syncobj[i]);
xcb_discard_reply(chain->conn, cookie.sequence);
}
}
#endif
}
wsi_destroy_image(&chain->base, &image->base);
@ -2231,6 +2333,7 @@ wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain)
drm_image_params = (struct wsi_drm_image_params){
.base.image_type = WSI_IMAGE_TYPE_DRM,
.same_gpu = wsi_x11_check_dri3_compatible(wsi_device, chain->conn),
.explicit_sync = chain->base.image_info.explicit_sync,
};
wsi_x11_get_dri3_modifiers(wsi_conn, chain->conn, chain->window, bit_depth, 32,
@ -2267,7 +2370,8 @@ x11_swapchain_destroy(struct wsi_swapchain *anv_chain,
pthread_join(chain->queue_manager, NULL);
pthread_join(chain->event_manager, NULL);
wsi_queue_destroy(&chain->acquire_queue);
if (!chain->base.image_info.explicit_sync)
wsi_queue_destroy(&chain->acquire_queue);
wsi_queue_destroy(&chain->present_queue);
for (uint32_t i = 0; i < chain->base.image_count; i++)
@ -2465,6 +2569,16 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
uint32_t present_caps = 0;
xcb_present_query_capabilities_cookie_t present_query_cookie;
xcb_present_query_capabilities_reply_t *present_query_reply;
present_query_cookie = xcb_present_query_capabilities(conn, window);
present_query_reply = xcb_present_query_capabilities_reply(conn, present_query_cookie, NULL);
if (present_query_reply) {
present_caps = present_query_reply->capabilities;
free(present_query_reply);
}
struct wsi_base_image_params *image_params = NULL;
struct wsi_cpu_image_params cpu_image_params;
struct wsi_drm_image_params drm_image_params;
@ -2480,6 +2594,14 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
drm_image_params = (struct wsi_drm_image_params) {
.base.image_type = WSI_IMAGE_TYPE_DRM,
.same_gpu = wsi_x11_check_dri3_compatible(wsi_device, conn),
.explicit_sync =
#ifdef HAVE_DRI3_EXPLICIT_SYNC
wsi_conn->has_dri3_explicit_sync &&
(present_caps & XCB_PRESENT_CAPABILITY_SYNCOBJ) &&
wsi_device_supports_explicit_sync(wsi_device),
#else
false,
#endif
};
if (wsi_device->supports_modifiers) {
wsi_x11_get_dri3_modifiers(wsi_conn, conn, window, bit_depth, 32,
@ -2522,15 +2644,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->status = VK_SUCCESS;
chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers;
chain->has_mit_shm = wsi_conn->has_mit_shm;
xcb_present_query_capabilities_cookie_t present_query_cookie;
xcb_present_query_capabilities_reply_t *present_query_reply;
present_query_cookie = xcb_present_query_capabilities(conn, chain->window);
present_query_reply = xcb_present_query_capabilities_reply(conn, present_query_cookie, NULL);
if (present_query_reply) {
chain->has_async_may_tear = present_query_reply->capabilities & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR;
free(present_query_reply);
}
chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR;
/* When images in the swapchain don't fit the window, X can still present them, but it won't
* happen by flip, only by copy. So this is a suboptimal copy, because if the client would change
@ -2568,10 +2682,11 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
* server and can be reused.
*/
chain->event_id = xcb_generate_id(chain->conn);
xcb_present_select_input(chain->conn, chain->event_id, chain->window,
XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY |
XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
uint32_t event_mask = XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY;
if (!chain->base.image_info.explicit_sync)
event_mask |= XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY;
xcb_present_select_input(chain->conn, chain->event_id, chain->window, event_mask);
/* Create an XCB event queue to hold present events outside of the usual
* application event queue
@ -2612,14 +2727,17 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
goto fail_init_images;
}
ret = wsi_queue_init(&chain->acquire_queue, chain->base.image_count + 1);
if (ret) {
wsi_queue_destroy(&chain->present_queue);
goto fail_init_images;
}
/* Acquire queue is only needed when using implicit sync */
if (!chain->base.image_info.explicit_sync) {
ret = wsi_queue_init(&chain->acquire_queue, chain->base.image_count + 1);
if (ret) {
wsi_queue_destroy(&chain->present_queue);
goto fail_init_images;
}
for (unsigned i = 0; i < chain->base.image_count; i++)
wsi_queue_push(&chain->acquire_queue, i);
for (unsigned i = 0; i < chain->base.image_count; i++)
wsi_queue_push(&chain->acquire_queue, i);
}
ret = pthread_create(&chain->queue_manager, NULL,
x11_manage_present_queue, chain);
@ -2648,7 +2766,8 @@ fail_init_event_queue:
fail_init_fifo_queue:
wsi_queue_destroy(&chain->present_queue);
wsi_queue_destroy(&chain->acquire_queue);
if (!chain->base.image_info.explicit_sync)
wsi_queue_destroy(&chain->acquire_queue);
fail_init_images:
for (uint32_t j = 0; j < image; j++)