radv: move pipe_misaligned and l2_coherent image checks to flags set on init

this should save 4-5% cpu in some cases

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11462>
This commit is contained in:
Mike Blumenkrantz 2021-06-18 09:08:40 -04:00 committed by Marge Bot
parent e1af22d9dd
commit 651c6b16ff
3 changed files with 77 additions and 75 deletions

View File

@ -3225,85 +3225,12 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_st
}
}
/* Determine if the image is affected by the pipe misaligned metadata issue
* which requires to invalidate L2.
*/
static bool
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
{
struct radeon_info *rad_info = &device->physical_device->rad_info;
unsigned log2_samples = util_logbase2(image->info.samples);
assert(rad_info->chip_class >= GFX10);
for (unsigned i = 0; i < image->plane_count; ++i) {
VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
unsigned log2_bpp_and_samples;
if (rad_info->chip_class >= GFX10_3) {
log2_bpp_and_samples = log2_bpp + log2_samples;
} else {
if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
log2_bpp = 2;
}
log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
}
unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
if (vk_format_has_depth(image->vk_format)) {
if (radv_image_is_tc_compat_htile(image) && overlap) {
return true;
}
} else {
unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
int samples_overlap = MIN2(log2_samples, overlap);
/* TODO: It shouldn't be necessary if the image has DCC but
* not readable by shader.
*/
if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
(samples_overlap > log2_samples_frag_diff)) {
return true;
}
}
}
return false;
}
static bool
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->rad_info.chip_class >= GFX10) {
return !device->physical_device->rad_info.tcc_rb_non_coherent &&
(image && !radv_image_is_pipe_misaligned(device, image));
} else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
if (image->info.samples == 1 &&
(image->usage &
(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!vk_format_has_stencil(image->vk_format)) {
/* Single-sample color and single-sample depth
* (not stencil) are coherent with shaders on
* GFX9.
*/
return true;
}
}
return false;
}
enum radv_cmd_flush_bits
radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags,
const struct radv_image *image)
{
bool has_CB_meta = true, has_DB_meta = true;
bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
bool image_is_coherent = image ? image->l2_coherent : false;
enum radv_cmd_flush_bits flush_bits = 0;
if (image) {
@ -3379,7 +3306,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag
bool has_CB_meta = true, has_DB_meta = true;
enum radv_cmd_flush_bits flush_bits = 0;
bool flush_CB = true, flush_DB = true;
bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
bool image_is_coherent = image ? image->l2_coherent : false;
if (image) {
if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {

View File

@ -1527,6 +1527,79 @@ radv_select_modifier(const struct radv_device *dev, VkFormat format,
unreachable("App specified an invalid modifier");
}
/* Determine if the image is affected by the pipe misaligned metadata issue
* which requires to invalidate L2.
*/
static bool
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
{
struct radeon_info *rad_info = &device->physical_device->rad_info;
unsigned log2_samples = util_logbase2(image->info.samples);
assert(rad_info->chip_class >= GFX10);
for (unsigned i = 0; i < image->plane_count; ++i) {
VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
unsigned log2_bpp_and_samples;
if (rad_info->chip_class >= GFX10_3) {
log2_bpp_and_samples = log2_bpp + log2_samples;
} else {
if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
log2_bpp = 2;
}
log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
}
unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
if (vk_format_has_depth(image->vk_format)) {
if (radv_image_is_tc_compat_htile(image) && overlap) {
return true;
}
} else {
unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
int samples_overlap = MIN2(log2_samples, overlap);
/* TODO: It shouldn't be necessary if the image has DCC but
* not readable by shader.
*/
if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
(samples_overlap > log2_samples_frag_diff)) {
return true;
}
}
}
return false;
}
static bool
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->rad_info.chip_class >= GFX10) {
return !device->physical_device->rad_info.tcc_rb_non_coherent &&
!radv_image_is_pipe_misaligned(device, image);
} else if (device->physical_device->rad_info.chip_class == GFX9) {
if (image->info.samples == 1 &&
(image->usage &
(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!vk_format_has_stencil(image->vk_format)) {
/* Single-sample color and single-sample depth
* (not stencil) are coherent with shaders on
* GFX9.
*/
return true;
}
}
return false;
}
VkResult
radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
const VkAllocationCallbacks *alloc, VkImage *pImage)
@ -1634,6 +1707,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
}
image->l2_coherent = radv_image_is_l2_coherent(device, image);
if (device->instance->debug_flags & RADV_DEBUG_IMG) {
radv_image_print_info(device, image);

View File

@ -1856,6 +1856,7 @@ struct radv_image {
unsigned queue_family_mask;
bool exclusive;
bool shareable;
bool l2_coherent;
/* Set when bound */
struct radeon_winsys_bo *bo;