radv,vulkan: add a new x11 wsi drirc workaround for DOOM Eternal

DOOM Eternal happily creates a swapchain with 2 images for IMMEDIATE.
This fixes a 10% performance issue with RADV.

Cc: 20.1 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5704>
This commit is contained in:
Samuel Pitoiset 2020-07-01 08:06:09 +02:00 committed by Marge Bot
parent 311b9f2583
commit ab9ecb607b
5 changed files with 52 additions and 22 deletions

View File

@ -586,6 +586,7 @@ DRI_CONF_BEGIN
DRI_CONF_ADAPTIVE_SYNC("true")
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT("false")
DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT("false")
DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING("false")
DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP("false")
DRI_CONF_RADV_NO_DYNAMIC_BOUNDS("false")

View File

@ -593,6 +593,12 @@ TODO: document the other workarounds.
<application name="DOOM" executable="DOOMx64vk.exe">
<option name="vk_x11_strict_image_count" value="true" />
</application>
<!-- DOOM Doom Eternal happily creates a swapchain with 2 images for
IMMEDIATE. This fixes a 10% performance issue with RADV. -->
<application name="DOOMEternal" executable="DOOMEternalx64vk.exe">
<option name="vk_x11_ensure_min_image_count" value="true" />
</application>
</device>
<!-- vmwgfx doesn't like full buffer swaps and can't sync to vertical retraces.-->
<device driver="vmwgfx">

View File

@ -307,6 +307,11 @@ DRI_CONF_OPT_BEGIN_B(vk_x11_strict_image_count, def) \
DRI_CONF_DESC("Force the X11 WSI to create exactly the number of image specified by the application in VkSwapchainCreateInfoKHR::minImageCount") \
DRI_CONF_OPT_END
#define DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(def) \
DRI_CONF_OPT_BEGIN_B(vk_x11_ensure_min_image_count, def) \
DRI_CONF_DESC("Force the X11 WSI to create at least the number of image specified by the driver in VkSurfaceCapabilitiesKHR::minImageCount") \
DRI_CONF_OPT_END
#define DRI_CONF_MESA_GLTHREAD(def) \
DRI_CONF_OPT_BEGIN_B(mesa_glthread, def) \
DRI_CONF_DESC("Enable offloading GL driver work to a separate thread") \

View File

@ -109,6 +109,11 @@ struct wsi_device {
* provided VkSwapchainCreateInfoKH::RminImageCount.
*/
bool strict_imageCount;
/* Ensures to create at least the number of image specified by the
* driver in VkSurfaceCapabilitiesKHR::minImageCount.
*/
bool ensure_minImageCount;
} x11;
/* Signals the semaphore such that any wait on the semaphore will wait on

View File

@ -450,6 +450,33 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
return VK_SUCCESS;
}
static uint32_t
x11_get_min_image_count(struct wsi_device *wsi_device)
{
if (wsi_device->x11.override_minImageCount)
return wsi_device->x11.override_minImageCount;
/* For IMMEDIATE and FIFO, most games work in a pipelined manner where the
* can produce frames at a rate of 1/MAX(CPU duration, GPU duration), but
* the render latency is CPU duration + GPU duration.
*
* This means that with scanout from pageflipping we need 3 frames to run
* full speed:
* 1) CPU rendering work
* 2) GPU rendering work
* 3) scanout
*
* Once we have a nonblocking acquire that returns a semaphore we can merge
* 1 and 3. Hence the ideal implementation needs only 2 images, but games
* cannot tellwe currently do not have an ideal implementation and that
* hence they need to allocate 3 images. So let us do it for them.
*
* This is a tradeoff as it uses more memory than needed for non-fullscreen
* and non-performance intensive applications.
*/
return 3;
}
static VkResult
x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
struct wsi_device *wsi_device,
@ -502,31 +529,10 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
}
/* For IMMEDIATE and FIFO, most games work in a pipelined manner where the
* can produce frames at a rate of 1/MAX(CPU duration, GPU duration), but
* the render latency is CPU duration + GPU duration.
*
* This means that with scanout from pageflipping we need 3 frames to run
* full speed:
* 1) CPU rendering work
* 2) GPU rendering work
* 3) scanout
*
* Once we have a nonblocking acquire that returns a semaphore we can merge
* 1 and 3. Hence the ideal implementation needs only 2 images, but games
* cannot tellwe currently do not have an ideal implementation and that
* hence they need to allocate 3 images. So let us do it for them.
*
* This is a tradeoff as it uses more memory than needed for non-fullscreen
* and non-performance intensive applications.
*/
caps->minImageCount = 3;
caps->minImageCount = x11_get_min_image_count(wsi_device);
/* There is no real maximum */
caps->maxImageCount = 0;
if (wsi_device->x11.override_minImageCount)
caps->minImageCount = wsi_device->x11.override_minImageCount;
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->maxImageArrayLayers = 1;
@ -1440,6 +1446,8 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
num_images = pCreateInfo->minImageCount;
else if (present_mode == VK_PRESENT_MODE_MAILBOX_KHR)
num_images = MAX2(num_images, 5);
else if (wsi_device->x11.ensure_minImageCount)
num_images = MAX2(num_images, x11_get_min_image_count(wsi_device));
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
struct wsi_x11_connection *wsi_conn =
@ -1659,6 +1667,11 @@ wsi_x11_init_wsi(struct wsi_device *wsi_device,
wsi_device->x11.strict_imageCount =
driQueryOptionb(dri_options, "vk_x11_strict_image_count");
}
if (driCheckOption(dri_options, "vk_x11_ensure_min_image_count", DRI_BOOL)) {
wsi_device->x11.ensure_minImageCount =
driQueryOptionb(dri_options, "vk_x11_ensure_min_image_count");
}
}
wsi->base.get_support = x11_surface_get_support;