radeonsi: Put retile map in separate buffers.

The retile maps are a software mechanism and hence very suceptible
to change. As such I'd like to avoid making it part of the cross
driver ABI.

Ideally we'd just use the cached tile info + a shader to avoid these
buffers altogether.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6783>
This commit is contained in:
Bas Nieuwenhuizen 2020-07-30 17:14:38 +02:00 committed by Marek Olšák
parent be48cf804b
commit c6c1fa9a26
5 changed files with 69 additions and 74 deletions

View File

@ -1642,34 +1642,32 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
/* Align the size to 4 (for the compute shader). */
surf->u.gfx9.dcc_retile_num_elements = align(surf->u.gfx9.dcc_retile_num_elements, 4);
if (!(surf->flags & RADEON_SURF_IMPORTED)) {
/* Compute address mapping from non-displayable to displayable DCC. */
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
memset(&addrin, 0, sizeof(addrin));
addrin.size = sizeof(addrin);
addrin.swizzleMode = din.swizzleMode;
addrin.resourceType = din.resourceType;
addrin.bpp = din.bpp;
addrin.numSlices = 1;
addrin.numMipLevels = 1;
addrin.numFrags = 1;
addrin.pitch = dout.pitch;
addrin.height = dout.height;
addrin.compressBlkWidth = dout.compressBlkWidth;
addrin.compressBlkHeight = dout.compressBlkHeight;
addrin.compressBlkDepth = dout.compressBlkDepth;
addrin.metaBlkWidth = dout.metaBlkWidth;
addrin.metaBlkHeight = dout.metaBlkHeight;
addrin.metaBlkDepth = dout.metaBlkDepth;
addrin.dccRamSliceSize = 0; /* Don't care for non-layered images. */
/* Compute address mapping from non-displayable to displayable DCC. */
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
memset(&addrin, 0, sizeof(addrin));
addrin.size = sizeof(addrin);
addrin.swizzleMode = din.swizzleMode;
addrin.resourceType = din.resourceType;
addrin.bpp = din.bpp;
addrin.numSlices = 1;
addrin.numMipLevels = 1;
addrin.numFrags = 1;
addrin.pitch = dout.pitch;
addrin.height = dout.height;
addrin.compressBlkWidth = dout.compressBlkWidth;
addrin.compressBlkHeight = dout.compressBlkHeight;
addrin.compressBlkDepth = dout.compressBlkDepth;
addrin.metaBlkWidth = dout.metaBlkWidth;
addrin.metaBlkHeight = dout.metaBlkHeight;
addrin.metaBlkDepth = dout.metaBlkDepth;
addrin.dccRamSliceSize = 0; /* Don't care for non-layered images. */
surf->u.gfx9.dcc_retile_map = ac_compute_dcc_retile_map(
addrlib, info, retile_dim[0], retile_dim[1], surf->u.gfx9.dcc.rb_aligned,
surf->u.gfx9.dcc.pipe_aligned, surf->u.gfx9.dcc_retile_use_uint16,
surf->u.gfx9.dcc_retile_num_elements, &addrin);
if (!surf->u.gfx9.dcc_retile_map)
return ADDR_OUTOFMEMORY;
}
surf->u.gfx9.dcc_retile_map = ac_compute_dcc_retile_map(
addrlib, info, retile_dim[0], retile_dim[1], surf->u.gfx9.dcc.rb_aligned,
surf->u.gfx9.dcc.pipe_aligned, surf->u.gfx9.dcc_retile_use_uint16,
surf->u.gfx9.dcc_retile_num_elements, &addrin);
if (!surf->u.gfx9.dcc_retile_map)
return ADDR_OUTOFMEMORY;
}
}
@ -2102,6 +2100,11 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
surf->total_size = surf->surf_size;
surf->alignment = surf->surf_alignment;
/* Ensure the offsets are always 0 if not available. */
surf->dcc_offset = surf->display_dcc_offset = 0;
surf->fmask_offset = surf->cmask_offset = 0;
surf->htile_offset = 0;
if (surf->htile_size) {
surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
surf->total_size = surf->htile_offset + surf->htile_size;
@ -2135,17 +2138,6 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
/* Add space for the displayable DCC buffer. */
surf->display_dcc_offset = align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
surf->total_size = surf->display_dcc_offset + surf->u.gfx9.display_dcc_size;
/* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
surf->dcc_retile_map_offset = align64(surf->total_size, info->tcc_cache_line_size);
if (surf->u.gfx9.dcc_retile_use_uint16) {
surf->total_size =
surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 2;
} else {
surf->total_size =
surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 4;
}
}
surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
@ -2161,7 +2153,6 @@ void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
{
surf->dcc_offset = 0;
surf->display_dcc_offset = 0;
surf->dcc_retile_map_offset = 0;
}
static unsigned eg_tile_split(unsigned tile_split)
@ -2493,6 +2484,4 @@ void ac_surface_override_offset_stride(const struct radeon_info *info, struct ra
surf->dcc_offset += offset;
if (surf->display_dcc_offset)
surf->display_dcc_offset += offset;
if (surf->dcc_retile_map_offset)
surf->dcc_retile_map_offset += offset;
}

View File

@ -249,7 +249,6 @@ struct radeon_surf {
uint64_t cmask_offset;
uint64_t dcc_offset;
uint64_t display_dcc_offset;
uint64_t dcc_retile_map_offset;
uint64_t total_size;
uint32_t alignment;

View File

@ -626,18 +626,18 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
struct pipe_image_view img[3];
assert(tex->surface.dcc_retile_map_offset && tex->surface.dcc_retile_map_offset <= UINT_MAX);
assert(tex->dcc_retile_buffer);
assert(tex->surface.dcc_offset && tex->surface.dcc_offset <= UINT_MAX);
assert(tex->surface.display_dcc_offset && tex->surface.display_dcc_offset <= UINT_MAX);
for (unsigned i = 0; i < 3; i++) {
img[i].resource = &tex->buffer.b.b;
img[i].resource = i == 0 ? &tex->dcc_retile_buffer->b.b : &tex->buffer.b.b;
img[i].access = i == 2 ? PIPE_IMAGE_ACCESS_WRITE : PIPE_IMAGE_ACCESS_READ;
img[i].shader_access = SI_IMAGE_ACCESS_AS_BUFFER;
}
img[0].format = use_uint16 ? PIPE_FORMAT_R16G16B16A16_UINT : PIPE_FORMAT_R32G32B32A32_UINT;
img[0].u.buf.offset = tex->surface.dcc_retile_map_offset;
img[0].u.buf.offset = 0;
img[0].u.buf.size = num_elements * (use_uint16 ? 2 : 4);
img[1].format = PIPE_FORMAT_R8_UINT;

View File

@ -384,6 +384,8 @@ struct si_texture {
unsigned ps_draw_ratio;
/* The number of clears since the last DCC usage analysis. */
unsigned num_slow_clears;
struct si_resource *dcc_retile_buffer;
};
struct si_surface {

View File

@ -534,6 +534,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
tex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
si_resource_reference(&tex->dcc_separate_buffer, new_tex->dcc_separate_buffer);
si_resource_reference(&tex->last_dcc_separate_buffer, new_tex->last_dcc_separate_buffer);
si_resource_reference(&tex->dcc_retile_buffer, new_tex->dcc_retile_buffer);
if (new_bind_flag == PIPE_BIND_LINEAR) {
assert(!tex->surface.htile_offset);
@ -813,6 +814,7 @@ static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource
pb_reference(&resource->buf, NULL);
si_resource_reference(&tex->dcc_separate_buffer, NULL);
si_resource_reference(&tex->last_dcc_separate_buffer, NULL);
si_resource_reference(&tex->dcc_retile_buffer, NULL);
FREE(tex);
}
@ -1145,41 +1147,44 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
}
}
}
}
/* Initialize displayable DCC that requires the retile blit. */
if (tex->surface.dcc_retile_map_offset) {
/* Uninitialized DCC can hang the display hw.
* Clear to white to indicate that. */
si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->surface.display_dcc_offset,
tex->surface.u.gfx9.display_dcc_size, DCC_CLEAR_COLOR_1111);
/* Initialize displayable DCC that requires the retile blit. */
if (tex->surface.display_dcc_offset) {
/* Uninitialized DCC can hang the display hw.
* Clear to white to indicate that. */
si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->surface.display_dcc_offset,
tex->surface.u.gfx9.display_dcc_size, DCC_CLEAR_COLOR_1111);
/* Upload the DCC retile map.
* Use a staging buffer for the upload, because
* the buffer backing the texture is unmappable.
*/
bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
dcc_retile_map_size,
sscreen->info.tcc_cache_line_size);
void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE);
/* Upload the DCC retile map.
* Use a staging buffer for the upload, because
* the buffer backing the texture is unmappable.
*/
bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
/* Upload the retile map into the staging buffer. */
memcpy(map, tex->surface.u.gfx9.dcc_retile_map, dcc_retile_map_size);
tex->dcc_retile_buffer = si_aligned_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
dcc_retile_map_size,
sscreen->info.tcc_cache_line_size);
struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
dcc_retile_map_size,
sscreen->info.tcc_cache_line_size);
void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE);
/* Copy the staging buffer to the buffer backing the texture. */
struct si_context *sctx = (struct si_context *)sscreen->aux_context;
/* Upload the retile map into the staging buffer. */
memcpy(map, tex->surface.u.gfx9.dcc_retile_map, dcc_retile_map_size);
assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
simple_mtx_lock(&sscreen->aux_context_lock);
si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b, tex->surface.dcc_retile_map_offset,
0, buf->b.b.width0);
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
simple_mtx_unlock(&sscreen->aux_context_lock);
/* Copy the staging buffer to the buffer backing the texture. */
struct si_context *sctx = (struct si_context *)sscreen->aux_context;
si_resource_reference(&buf, NULL);
}
simple_mtx_lock(&sscreen->aux_context_lock);
si_sdma_copy_buffer(sctx, &tex->dcc_retile_buffer->b.b, &buf->b.b, 0,
0, buf->b.b.width0);
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
simple_mtx_unlock(&sscreen->aux_context_lock);
si_resource_reference(&buf, NULL);
}
/* Initialize the CMASK base register value. */