From 899263ecfc8251ffff375d70b4c5d44f192112b0 Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Tue, 23 Jan 2024 18:55:12 -0800 Subject: [PATCH] wsi/x11: support explicit sync This adds support to the X11 WSI for explicit synchronization using DRM syncobjs. It relies on versions 1.4 of the DRI3 and Present extensions. Signed-off-by: Erik Kurzinger --- meson.build | 8 + src/vulkan/wsi/wsi_common.c | 8 + src/vulkan/wsi/wsi_common.h | 3 + src/vulkan/wsi/wsi_common_wayland.c | 3 +- src/vulkan/wsi/wsi_common_x11.c | 223 +++++++++++++++++++++------- 5 files changed, 191 insertions(+), 54 deletions(-) diff --git a/meson.build b/meson.build index c99b98f542e9d..3af6595ea3a09 100644 --- a/meson.build +++ b/meson.build @@ -2066,6 +2066,7 @@ if host_machine.cpu_family() == 'x86_64' and cc.get_id() == 'gcc' endif with_dri3_modifiers = false +with_dri3_explicit_sync = false with_xcb_keysyms = false if with_platform_x11 if with_glx == 'xlib' @@ -2109,6 +2110,10 @@ if with_platform_x11 dep_xcb_present.version().version_compare('>= 1.13')) with_dri3_modifiers = true endif + if (dep_xcb_dri3.version().version_compare('>= 1.17') and + dep_xcb_present.version().version_compare('>= 1.17')) + with_dri3_explicit_sync = true + endif dep_xcb_shm = dependency('xcb-shm') dep_xcb_sync = dependency('xcb-sync') dep_xshmfence = dependency('xshmfence', version : '>= 1.1') @@ -2151,6 +2156,9 @@ endif if with_dri3_modifiers pre_args += '-DHAVE_DRI3_MODIFIERS' endif +if with_dri3_explicit_sync + pre_args += '-DHAVE_DRI3_EXPLICIT_SYNC' +endif if with_gallium_drisw_kms pre_args += '-DHAVE_DRISW_KMS' endif diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index 024c20f153082..2b7d8717e1918 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -2236,3 +2236,11 @@ wsi_caps_get_image_usage(void) VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; } + +bool +wsi_device_supports_explicit_sync(struct wsi_device *device) +{ + return !device->sw && device->has_timeline_semaphore && + (device->timeline_semaphore_export_handle_types & + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); +} diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h index e7ff9701f837e..014cb718b1786 100644 --- a/src/vulkan/wsi/wsi_common.h +++ b/src/vulkan/wsi/wsi_common.h @@ -356,6 +356,9 @@ wsi_common_vk_instance_supports_present_wait(const struct vk_instance *instance) VkImageUsageFlags wsi_caps_get_image_usage(void); +bool +wsi_device_supports_explicit_sync(struct wsi_device *device); + #define wsi_common_vk_warn_once(warning) \ do { \ static int warned = false; \ diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 4583f9efad34f..9d7750ba00945 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -208,8 +208,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR, static bool wsi_wl_use_explicit_sync(struct wsi_wl_display *display, struct wsi_device *device) { - return !device->sw && device->has_timeline_semaphore && - (device->timeline_semaphore_export_handle_types & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) && + return wsi_device_supports_explicit_sync(device) && display->wl_syncobj != NULL; } diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index f6492d7afb43d..54640f9d04fc2 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -78,6 +78,7 @@ struct wsi_x11_connection { bool has_dri3; bool has_dri3_modifiers; + bool has_dri3_explicit_sync; bool has_present; bool is_proprietary_x11; bool is_xwayland; @@ -233,6 +234,8 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, wsi_dev->has_import_memory_host; bool has_dri3_v1_2 = false; bool has_present_v1_2 = false; + bool has_dri3_v1_4 = false; + bool has_present_v1_4 = false; struct wsi_x11_connection *wsi_conn = vk_alloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8, @@ -292,10 +295,12 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, xcb_dri3_query_version_cookie_t ver_cookie; xcb_dri3_query_version_reply_t *ver_reply; - ver_cookie = xcb_dri3_query_version(conn, 1, 2); + ver_cookie = xcb_dri3_query_version(conn, 1, 4); ver_reply = xcb_dri3_query_version_reply(conn, ver_cookie, NULL); has_dri3_v1_2 = ver_reply != NULL && (ver_reply->major_version > 1 || ver_reply->minor_version >= 2); + has_dri3_v1_4 = ver_reply != NULL && + (ver_reply->major_version > 1 || ver_reply->minor_version >= 4); free(ver_reply); } #endif @@ -306,10 +311,12 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, xcb_present_query_version_cookie_t ver_cookie; xcb_present_query_version_reply_t *ver_reply; - ver_cookie = xcb_present_query_version(conn, 1, 2); + ver_cookie = xcb_present_query_version(conn, 1, 4); ver_reply = xcb_present_query_version_reply(conn, ver_cookie, NULL); has_present_v1_2 = (ver_reply->major_version > 1 || ver_reply->minor_version >= 2); + has_present_v1_4 = + (ver_reply->major_version > 1 || ver_reply->minor_version >= 4); free(ver_reply); } #endif @@ -329,6 +336,7 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, xwl_reply); wsi_conn->has_dri3_modifiers = has_dri3_v1_2 && has_present_v1_2; + wsi_conn->has_dri3_explicit_sync = has_dri3_v1_4 && has_present_v1_4; wsi_conn->is_proprietary_x11 = false; if (amd_reply && amd_reply->present) wsi_conn->is_proprietary_x11 = true; @@ -1063,6 +1071,9 @@ struct x11_image { #define X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS 16 uint32_t present_queued_count; struct x11_image_pending_completion pending_completions[X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS]; +#ifdef HAVE_DRI3_EXPLICIT_SYNC + uint32_t dri3_syncobj[WSI_ES_COUNT]; +#endif }; struct x11_swapchain { @@ -1220,6 +1231,25 @@ x11_get_wsi_image(struct wsi_swapchain *wsi_chain, uint32_t image_index) static bool wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain); +static VkResult +x11_wait_for_explicit_sync_release_submission(struct x11_swapchain *chain, + uint64_t rel_timeout_ns, + uint32_t *image_index) +{ + STACK_ARRAY(struct wsi_image*, images, chain->base.image_count); + for (uint32_t i = 0; i < chain->base.image_count; i++) + images[i] = &chain->images[i].base; + + VkResult result = + wsi_drm_wait_for_explicit_sync_release(&chain->base, + chain->base.image_count, + images, + rel_timeout_ns, + image_index); + STACK_ARRAY_FINISH(images); + return result; +} + /* XXX this belongs in presentproto */ #ifndef PresentWindowDestroyed #define PresentWindowDestroyed (1 << 0) @@ -1250,6 +1280,7 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain, case XCB_PRESENT_EVENT_IDLE_NOTIFY: { xcb_present_idle_notify_event_t *idle = (void *) event; + assert(!chain->base.image_info.explicit_sync); for (unsigned i = 0; i < chain->base.image_count; i++) { if (chain->images[i].pixmap == idle->pixmap) { chain->sent_image_count--; @@ -1362,8 +1393,10 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index, xshmfence_reset(image->shm_fence); - ++chain->sent_image_count; - assert(chain->sent_image_count <= chain->base.image_count); + if (!chain->base.image_info.explicit_sync) { + ++chain->sent_image_count; + assert(chain->sent_image_count <= chain->base.image_count); + } ++chain->send_sbc; uint32_t serial = (uint32_t)chain->send_sbc; @@ -1375,22 +1408,48 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index, .serial = serial, }; - xcb_void_cookie_t cookie = - xcb_present_pixmap(chain->conn, - chain->window, - image->pixmap, - serial, - 0, /* valid */ - image->update_area, /* update */ - 0, /* x_off */ - 0, /* y_off */ - XCB_NONE, /* target_crtc */ - XCB_NONE, - image->sync_fence, - options, - target_msc, - divisor, - remainder, 0, NULL); + xcb_void_cookie_t cookie; +#ifdef HAVE_DRI3_EXPLICIT_SYNC + if (chain->base.image_info.explicit_sync) { + uint64_t acquire_point = image->base.explicit_sync[WSI_ES_ACQUIRE].timeline; + uint64_t release_point = image->base.explicit_sync[WSI_ES_RELEASE].timeline; + cookie = xcb_present_pixmap_synced( + chain->conn, + chain->window, + image->pixmap, + serial, + 0, /* valid */ + image->update_area, /* update */ + 0, /* x_off */ + 0, /* y_off */ + XCB_NONE, /* target_crtc */ + image->dri3_syncobj[WSI_ES_ACQUIRE], /* acquire_syncobj */ + image->dri3_syncobj[WSI_ES_RELEASE], /* release_syncobj */ + acquire_point, + release_point, + options, + target_msc, + divisor, + remainder, 0, NULL); + } else +#endif + { + cookie = xcb_present_pixmap(chain->conn, + chain->window, + image->pixmap, + serial, + 0, /* valid */ + image->update_area, /* update */ + 0, /* x_off */ + 0, /* y_off */ + XCB_NONE, /* target_crtc */ + XCB_NONE, + image->sync_fence, + options, + target_msc, + divisor, + remainder, 0, NULL); + } xcb_discard_reply(chain->conn, cookie.sequence); xcb_flush(chain->conn); return x11_swapchain_result(chain, VK_SUCCESS); @@ -1403,6 +1462,7 @@ static VkResult x11_present_to_x11_sw(struct x11_swapchain *chain, uint32_t image_index, uint64_t target_msc) { + assert(!chain->base.image_info.explicit_sync); struct x11_image *image = &chain->images[image_index]; /* Begin querying this before submitting the frame for improved async performance. @@ -1622,10 +1682,13 @@ x11_release_images(struct wsi_swapchain *wsi_chain, if (chain->status == VK_ERROR_SURFACE_LOST_KHR) return chain->status; - for (uint32_t i = 0; i < count; i++) { - uint32_t index = indices[i]; - assert(index < chain->base.image_count); - wsi_queue_push(&chain->acquire_queue, index); + /* If we're using implicit sync, push images to the acquire queue */ + if (!chain->base.image_info.explicit_sync) { + for (uint32_t i = 0; i < count; i++) { + uint32_t index = indices[i]; + assert(index < chain->base.image_count); + wsi_queue_push(&chain->acquire_queue, index); + } } return VK_SUCCESS; @@ -1658,8 +1721,13 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain, if (result < 0) return result; - result = wsi_queue_pull(&chain->acquire_queue, - image_index, timeout); + if (chain->base.image_info.explicit_sync) { + result = x11_wait_for_explicit_sync_release_submission(chain, timeout, + image_index); + } else { + result = wsi_queue_pull(&chain->acquire_queue, + image_index, timeout); + } if (result == VK_TIMEOUT) return info->timeout ? VK_TIMEOUT : VK_NOT_READY; @@ -1676,7 +1744,8 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain, return result; assert(*image_index < chain->base.image_count); - if (chain->images[*image_index].shm_fence) + if (chain->images[*image_index].shm_fence && + !chain->base.image_info.explicit_sync) xshmfence_await(chain->images[*image_index].shm_fence); return result; @@ -1782,7 +1851,7 @@ x11_manage_event_queue(void *state) } } - if (!assume_forward_progress) { + if (!assume_forward_progress && !chain->base.image_info.explicit_sync) { /* If true, application expects acquire (IDLE) to happen in finite time. */ assume_forward_progress = x11_driver_owned_images(chain) < forward_progress_guaranteed_acquired_images; @@ -1867,7 +1936,9 @@ x11_manage_present_queue(void *state) VkPresentModeKHR present_mode = chain->images[image_index].present_mode; if (x11_needs_wait_for_fences(chain->base.wsi, wsi_conn, - present_mode)) { + present_mode) && + /* not necessary with explicit sync */ + !chain->base.image_info.explicit_sync) { MESA_TRACE_SCOPE("wait fence"); result = chain->base.wsi->WaitForFences(chain->base.device, 1, &chain->base.fences[image_index], @@ -1920,7 +1991,8 @@ x11_manage_present_queue(void *state) pthread_mutex_lock(&chain->thread_state_lock); x11_swapchain_result(chain, result); - wsi_queue_push(&chain->acquire_queue, UINT32_MAX); + if (!chain->base.image_info.explicit_sync) + wsi_queue_push(&chain->acquire_queue, UINT32_MAX); pthread_mutex_unlock(&chain->thread_state_lock); return NULL; @@ -2056,6 +2128,27 @@ x11_image_init(VkDevice device_h, struct x11_swapchain *chain, goto fail_image; } +#ifdef HAVE_DRI3_EXPLICIT_SYNC + if (chain->base.image_info.explicit_sync) { + for (uint32_t i = 0; i < WSI_ES_COUNT; i++) { + image->dri3_syncobj[i] = xcb_generate_id(chain->conn); + int fd = dup(image->base.explicit_sync[i].fd); + if (fd < 0) + goto fail_image; + + cookie = xcb_dri3_import_syncobj_checked(chain->conn, + image->dri3_syncobj[i], + chain->window, + fd /* libxcb closes the fd */); + error = xcb_request_check(chain->conn, cookie); + if (error != NULL) { + free(error); + goto fail_image; + } + } + } +#endif + out_fence: fence_fd = xshmfence_alloc_shm(); if (fence_fd < 0) @@ -2106,6 +2199,15 @@ x11_image_finish(struct x11_swapchain *chain, cookie = xcb_xfixes_destroy_region(chain->conn, image->update_region); xcb_discard_reply(chain->conn, cookie.sequence); + +#ifdef HAVE_DRI3_EXPLICIT_SYNC + if (chain->base.image_info.explicit_sync) { + for (uint32_t i = 0; i < WSI_ES_COUNT; i++) { + cookie = xcb_dri3_free_syncobj(chain->conn, image->dri3_syncobj[i]); + xcb_discard_reply(chain->conn, cookie.sequence); + } + } +#endif } wsi_destroy_image(&chain->base, &image->base); @@ -2231,6 +2333,7 @@ wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain) drm_image_params = (struct wsi_drm_image_params){ .base.image_type = WSI_IMAGE_TYPE_DRM, .same_gpu = wsi_x11_check_dri3_compatible(wsi_device, chain->conn), + .explicit_sync = chain->base.image_info.explicit_sync, }; wsi_x11_get_dri3_modifiers(wsi_conn, chain->conn, chain->window, bit_depth, 32, @@ -2267,7 +2370,8 @@ x11_swapchain_destroy(struct wsi_swapchain *anv_chain, pthread_join(chain->queue_manager, NULL); pthread_join(chain->event_manager, NULL); - wsi_queue_destroy(&chain->acquire_queue); + if (!chain->base.image_info.explicit_sync) + wsi_queue_destroy(&chain->acquire_queue); wsi_queue_destroy(&chain->present_queue); for (uint32_t i = 0; i < chain->base.image_count; i++) @@ -2465,6 +2569,16 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, return VK_ERROR_OUT_OF_HOST_MEMORY; } + uint32_t present_caps = 0; + xcb_present_query_capabilities_cookie_t present_query_cookie; + xcb_present_query_capabilities_reply_t *present_query_reply; + present_query_cookie = xcb_present_query_capabilities(conn, window); + present_query_reply = xcb_present_query_capabilities_reply(conn, present_query_cookie, NULL); + if (present_query_reply) { + present_caps = present_query_reply->capabilities; + free(present_query_reply); + } + struct wsi_base_image_params *image_params = NULL; struct wsi_cpu_image_params cpu_image_params; struct wsi_drm_image_params drm_image_params; @@ -2480,6 +2594,14 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, drm_image_params = (struct wsi_drm_image_params) { .base.image_type = WSI_IMAGE_TYPE_DRM, .same_gpu = wsi_x11_check_dri3_compatible(wsi_device, conn), + .explicit_sync = +#ifdef HAVE_DRI3_EXPLICIT_SYNC + wsi_conn->has_dri3_explicit_sync && + (present_caps & XCB_PRESENT_CAPABILITY_SYNCOBJ) && + wsi_device_supports_explicit_sync(wsi_device), +#else + false, +#endif }; if (wsi_device->supports_modifiers) { wsi_x11_get_dri3_modifiers(wsi_conn, conn, window, bit_depth, 32, @@ -2522,15 +2644,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->status = VK_SUCCESS; chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers; chain->has_mit_shm = wsi_conn->has_mit_shm; - - xcb_present_query_capabilities_cookie_t present_query_cookie; - xcb_present_query_capabilities_reply_t *present_query_reply; - present_query_cookie = xcb_present_query_capabilities(conn, chain->window); - present_query_reply = xcb_present_query_capabilities_reply(conn, present_query_cookie, NULL); - if (present_query_reply) { - chain->has_async_may_tear = present_query_reply->capabilities & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR; - free(present_query_reply); - } + chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR; /* When images in the swapchain don't fit the window, X can still present them, but it won't * happen by flip, only by copy. So this is a suboptimal copy, because if the client would change @@ -2568,10 +2682,11 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, * server and can be reused. */ chain->event_id = xcb_generate_id(chain->conn); - xcb_present_select_input(chain->conn, chain->event_id, chain->window, - XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY | - XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY | - XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); + uint32_t event_mask = XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY | + XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY; + if (!chain->base.image_info.explicit_sync) + event_mask |= XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY; + xcb_present_select_input(chain->conn, chain->event_id, chain->window, event_mask); /* Create an XCB event queue to hold present events outside of the usual * application event queue @@ -2612,14 +2727,17 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, goto fail_init_images; } - ret = wsi_queue_init(&chain->acquire_queue, chain->base.image_count + 1); - if (ret) { - wsi_queue_destroy(&chain->present_queue); - goto fail_init_images; - } + /* Acquire queue is only needed when using implicit sync */ + if (!chain->base.image_info.explicit_sync) { + ret = wsi_queue_init(&chain->acquire_queue, chain->base.image_count + 1); + if (ret) { + wsi_queue_destroy(&chain->present_queue); + goto fail_init_images; + } - for (unsigned i = 0; i < chain->base.image_count; i++) - wsi_queue_push(&chain->acquire_queue, i); + for (unsigned i = 0; i < chain->base.image_count; i++) + wsi_queue_push(&chain->acquire_queue, i); + } ret = pthread_create(&chain->queue_manager, NULL, x11_manage_present_queue, chain); @@ -2648,7 +2766,8 @@ fail_init_event_queue: fail_init_fifo_queue: wsi_queue_destroy(&chain->present_queue); - wsi_queue_destroy(&chain->acquire_queue); + if (!chain->base.image_info.explicit_sync) + wsi_queue_destroy(&chain->acquire_queue); fail_init_images: for (uint32_t j = 0; j < image; j++)