radv: optimize VRS when no depth stencil attachment is bound

This is allowed by the Vulkan spec and we have to handle this situation
internally. We used to create and bind a 4096x4096 image to copy the
VRS rates but this wasted too much VRAM (~33MiB). Now, the driver only
allocates a HTILE buffer (~1MiB) and bind it to the framebuffer.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12243>
This commit is contained in:
Samuel Pitoiset 2021-08-06 16:26:57 +02:00 committed by Marge Bot
parent ab635b024b
commit 1402c17e4f
3 changed files with 64 additions and 21 deletions

View File

@ -2413,8 +2413,8 @@ radv_cmd_buffer_get_vrs_image(struct radv_cmd_buffer *cmd_buffer)
if (!device->vrs.image) {
VkResult result;
/* The global VRS image is created on-demand to avoid wasting space */
result = radv_device_init_vrs_image(device);
/* The global VRS state is initialized on-demand to avoid wasting VRAM. */
result = radv_device_init_vrs_state(device);
if (result != VK_SUCCESS) {
cmd_buffer->record_result = result;
return NULL;
@ -2483,6 +2483,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
* bind our internal depth buffer that contains the VRS data as part of HTILE.
*/
VkImageLayout layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer;
struct radv_image *image = cmd_buffer->device->vrs.image;
struct radv_ds_buffer_info ds;
struct radv_image_view iview;
@ -2504,9 +2505,9 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
},
NULL);
radv_initialise_ds_surface(cmd_buffer->device, &ds, &iview);
radv_initialise_vrs_surface(image, htile_buffer, &ds);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, image->bo);
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, htile_buffer->bo);
radv_emit_fb_ds_state(cmd_buffer, &ds, &iview, layout, false);
} else {
@ -5167,11 +5168,7 @@ radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpa
if (ds_image) {
/* HTILE buffer */
uint64_t htile_offset = ds_image->offset + ds_image->planes[0].surface.meta_offset;
uint64_t htile_size = ds_image->planes[0].surface.meta_slice_size;
struct radv_buffer htile_buffer = {.bo = ds_image->bo,
.offset = htile_offset,
.size = htile_size};
struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer;
VkExtent2D extent = {
.width = MIN2(fb->width, ds_image->info.width),
@ -5179,7 +5176,7 @@ radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpa
};
/* Copy the VRS rates to the HTILE buffer. */
radv_copy_vrs_htile(cmd_buffer, vrs_iview->image, &extent, ds_image, &htile_buffer, false);
radv_copy_vrs_htile(cmd_buffer, vrs_iview->image, &extent, ds_image, htile_buffer, false);
}
}
}

View File

@ -2837,13 +2837,14 @@ radv_device_finish_border_color(struct radv_device *device)
}
VkResult
radv_device_init_vrs_image(struct radv_device *device)
radv_device_init_vrs_state(struct radv_device *device)
{
/* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this
* dynamically at some point. Also, it's probably better to use S8_UINT but no HTILE support yet.
* dynamically at some point.
*/
uint32_t width = 4096, height = 4096;
VkDeviceMemory mem;
VkBuffer buffer;
VkResult result;
VkImage image;
@ -2868,14 +2869,26 @@ radv_device_init_vrs_image(struct radv_device *device)
if (result != VK_SUCCESS)
return result;
VkImageMemoryRequirementsInfo2 info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
.image = image,
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = radv_image_from_handle(image)->planes[0].surface.meta_size,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
result = radv_CreateBuffer(radv_device_to_handle(device), &buffer_create_info,
&device->meta_state.alloc, &buffer);
if (result != VK_SUCCESS)
goto fail_create;
VkBufferMemoryRequirementsInfo2 info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
.buffer = buffer,
};
VkMemoryRequirements2 mem_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
};
radv_GetImageMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
radv_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
@ -2887,18 +2900,19 @@ radv_device_init_vrs_image(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail_alloc;
VkBindImageMemoryInfo bind_info = {
.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
.image = image,
VkBindBufferMemoryInfo bind_info = {
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
.buffer = buffer,
.memory = mem,
.memoryOffset = 0
};
result = radv_BindImageMemory2(radv_device_to_handle(device), 1, &bind_info);
result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
if (result != VK_SUCCESS)
goto fail_bind;
device->vrs.image = radv_image_from_handle(image);
device->vrs.buffer = radv_buffer_from_handle(buffer);
device->vrs.mem = radv_device_memory_from_handle(mem);
return VK_SUCCESS;
@ -2906,6 +2920,8 @@ radv_device_init_vrs_image(struct radv_device *device)
fail_bind:
radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
fail_alloc:
radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
fail_create:
radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
return result;
@ -2916,6 +2932,8 @@ radv_device_finish_vrs_image(struct radv_device *device)
{
radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
&device->meta_state.alloc);
radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
&device->meta_state.alloc);
radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
&device->meta_state.alloc);
}
@ -6851,6 +6869,31 @@ radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_v
return max_zplanes;
}
void
radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
struct radv_ds_buffer_info *ds)
{
const struct radeon_surf *surf = &image->planes[0].surface;
assert(image->vk_format == VK_FORMAT_D16_UNORM);
memset(ds, 0, sizeof(*ds));
ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) |
S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028038_ZRANGE_PRECISION(1) |
S_028038_TILE_SURFACE_ENABLE(1);
ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
ds->db_depth_size = S_02801C_X_MAX(image->info.width - 1) |
S_02801C_Y_MAX(image->info.height - 1);
ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) |
S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
}
void
radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
struct radv_image_view *iview)

View File

@ -852,6 +852,7 @@ struct radv_device {
/* Depth image for VRS when not bound by the app. */
struct {
struct radv_image *image;
struct radv_buffer *buffer; /* HTILE */
struct radv_device_memory *mem;
} vrs;
};
@ -1287,6 +1288,8 @@ void radv_initialise_color_surface(struct radv_device *device, struct radv_color
struct radv_image_view *iview);
void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
struct radv_image_view *iview);
void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
struct radv_ds_buffer_info *ds);
/**
* Attachment state when recording a renderpass instance.
@ -1561,7 +1564,7 @@ void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_device_init_msaa(struct radv_device *device);
VkResult radv_device_init_vrs_image(struct radv_device *device);
VkResult radv_device_init_vrs_state(struct radv_device *device);
void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
const struct radv_image_view *iview,