anv: Add driconf option to disable compression for 16bpp format

On Fallout4, enabling HIZ_CCS_WT compression for D16_UNORM format
regress the performance by 2%, in order to avoid that disable
compression via driconf option.

The experiment showed that, running Fallout4 with HIZ performs better
than HIZ_CCS and HIZ_CCS_WT. Reason behind that is the benchmark uses
the depth pass with D16_UNORM surfaces format which fills the L3 cache
and next pass doesn't make use of it where we end up clearing cache.

v2:
- Don't add conditional check in isl (Nanley, Jason)
- Move disable_d16unorm_compression flag to instance (Lionel)
- Use plane_format.isl_format (Nanley)

v3:
- Add more descriptive comment (Marcin Ślusarz)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6734>
This commit is contained in:
Sagar Ghuge 2020-09-14 15:52:11 -07:00 committed by Marge Bot
parent 49593205b9
commit bcfec61d1e
5 changed files with 31 additions and 1 deletions

View File

@ -53,6 +53,7 @@ static const driOptionDescription anv_dri_options[] = {
DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
DRI_CONF_DISABLE_D16UNORM_COMPRESSION(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@ -781,6 +782,8 @@ VkResult anv_CreateInstance(
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
anv_init_dri_options(instance);
instance->disable_d16unorm_compression =
driQueryOptionb(&instance->dri_options, "disable_d16unorm_compression");
*pInstance = anv_instance_to_handle(instance);

View File

@ -420,7 +420,20 @@ add_aux_surface_if_supported(struct anv_device *device,
* TODO: This is a heuristic trade-off; we haven't tuned it at all.
*/
assert(device->info.gen >= 12);
image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ_CCS_WT;
/* The experiment showed that running the benchmark with HIZ performs
* better than HIZ_CCS and HIZ_CCS_WT. Because the benchmark uses the
* depth pass with D16_UNORM surfaces format which fills the L3 cache
* and next pass doesn't make use of it where we end up clearing cache
* which results in performance regression.
*
* In order to avoid perf regression, disable HIZ_CCS_WT compression
* for D16_UNORM surface format on Fallout4 via driconf option.
*/
if (plane_format.isl_format == ISL_FORMAT_R16_UNORM &&
device->physical->instance->disable_d16unorm_compression)
image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ;
else
image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ_CCS_WT;
} else {
assert(device->info.gen >= 12);
image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ_CCS;

View File

@ -1171,6 +1171,7 @@ struct anv_instance {
struct driOptionCache dri_options;
struct driOptionCache available_dri_options;
bool disable_d16unorm_compression;
};
VkResult anv_init_wsi(struct anv_physical_device *physical_device);

View File

@ -733,6 +733,11 @@ TODO: document the other workarounds.
<option name="vs_position_always_invariant" value="true" />
</application>
</device>
<device driver="anv">
<application name="Fallout 4" executable="Fallout4.exe">
<option name="disable_d16unorm_compression" value="true" />
</application>
</device>
<device driver="radv">
<application name="Shadow Of The Tomb Raider" executable="ShadowOfTheTombRaider">
<option name="radv_report_llvm9_version_string" value="true" />

View File

@ -451,4 +451,12 @@
DRI_CONF_OPT_I(radv_override_uniform_offset_alignment, def, 0, 128, \
"Override the minUniformBufferOffsetAlignment exposed to the application. (0 = default)")
/**
* \brief ANV specific configuration options
*/
#define DRI_CONF_DISABLE_D16UNORM_COMPRESSION(def) \
DRI_CONF_OPT_B(disable_d16unorm_compression, def, \
"Disable HIZ_CCS_WT compression for D16_UNORM surface format")
#endif