radeonsi: Put retile map in separate buffers.

The retile maps are a software mechanism and hence very suceptible
to change. As such I'd like to avoid making it part of the cross
driver ABI.

Ideally we'd just use the cached tile info + a shader to avoid these
buffers altogether.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6783>
This commit is contained in:
Bas Nieuwenhuizen 2020-07-30 17:14:38 +02:00 committed by Marek Olšák
parent be48cf804b
commit c6c1fa9a26
5 changed files with 69 additions and 74 deletions

View File

@ -1642,7 +1642,6 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
/* Align the size to 4 (for the compute shader). */
surf->u.gfx9.dcc_retile_num_elements = align(surf->u.gfx9.dcc_retile_num_elements, 4);
if (!(surf->flags & RADEON_SURF_IMPORTED)) {
/* Compute address mapping from non-displayable to displayable DCC. */
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
memset(&addrin, 0, sizeof(addrin));
@ -1671,7 +1670,6 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
return ADDR_OUTOFMEMORY;
}
}
}
/* FMASK */
if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
@ -2102,6 +2100,11 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
surf->total_size = surf->surf_size;
surf->alignment = surf->surf_alignment;
/* Ensure the offsets are always 0 if not available. */
surf->dcc_offset = surf->display_dcc_offset = 0;
surf->fmask_offset = surf->cmask_offset = 0;
surf->htile_offset = 0;
if (surf->htile_size) {
surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
surf->total_size = surf->htile_offset + surf->htile_size;
@ -2135,17 +2138,6 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
/* Add space for the displayable DCC buffer. */
surf->display_dcc_offset = align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
surf->total_size = surf->display_dcc_offset + surf->u.gfx9.display_dcc_size;
/* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
surf->dcc_retile_map_offset = align64(surf->total_size, info->tcc_cache_line_size);
if (surf->u.gfx9.dcc_retile_use_uint16) {
surf->total_size =
surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 2;
} else {
surf->total_size =
surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 4;
}
}
surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
@ -2161,7 +2153,6 @@ void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
{
surf->dcc_offset = 0;
surf->display_dcc_offset = 0;
surf->dcc_retile_map_offset = 0;
}
static unsigned eg_tile_split(unsigned tile_split)
@ -2493,6 +2484,4 @@ void ac_surface_override_offset_stride(const struct radeon_info *info, struct ra
surf->dcc_offset += offset;
if (surf->display_dcc_offset)
surf->display_dcc_offset += offset;
if (surf->dcc_retile_map_offset)
surf->dcc_retile_map_offset += offset;
}

View File

@ -249,7 +249,6 @@ struct radeon_surf {
uint64_t cmask_offset;
uint64_t dcc_offset;
uint64_t display_dcc_offset;
uint64_t dcc_retile_map_offset;
uint64_t total_size;
uint32_t alignment;

View File

@ -626,18 +626,18 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
struct pipe_image_view img[3];
assert(tex->surface.dcc_retile_map_offset && tex->surface.dcc_retile_map_offset <= UINT_MAX);
assert(tex->dcc_retile_buffer);
assert(tex->surface.dcc_offset && tex->surface.dcc_offset <= UINT_MAX);
assert(tex->surface.display_dcc_offset && tex->surface.display_dcc_offset <= UINT_MAX);
for (unsigned i = 0; i < 3; i++) {
img[i].resource = &tex->buffer.b.b;
img[i].resource = i == 0 ? &tex->dcc_retile_buffer->b.b : &tex->buffer.b.b;
img[i].access = i == 2 ? PIPE_IMAGE_ACCESS_WRITE : PIPE_IMAGE_ACCESS_READ;
img[i].shader_access = SI_IMAGE_ACCESS_AS_BUFFER;
}
img[0].format = use_uint16 ? PIPE_FORMAT_R16G16B16A16_UINT : PIPE_FORMAT_R32G32B32A32_UINT;
img[0].u.buf.offset = tex->surface.dcc_retile_map_offset;
img[0].u.buf.offset = 0;
img[0].u.buf.size = num_elements * (use_uint16 ? 2 : 4);
img[1].format = PIPE_FORMAT_R8_UINT;

View File

@ -384,6 +384,8 @@ struct si_texture {
unsigned ps_draw_ratio;
/* The number of clears since the last DCC usage analysis. */
unsigned num_slow_clears;
struct si_resource *dcc_retile_buffer;
};
struct si_surface {

View File

@ -534,6 +534,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
tex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
si_resource_reference(&tex->dcc_separate_buffer, new_tex->dcc_separate_buffer);
si_resource_reference(&tex->last_dcc_separate_buffer, new_tex->last_dcc_separate_buffer);
si_resource_reference(&tex->dcc_retile_buffer, new_tex->dcc_retile_buffer);
if (new_bind_flag == PIPE_BIND_LINEAR) {
assert(!tex->surface.htile_offset);
@ -813,6 +814,7 @@ static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource
pb_reference(&resource->buf, NULL);
si_resource_reference(&tex->dcc_separate_buffer, NULL);
si_resource_reference(&tex->last_dcc_separate_buffer, NULL);
si_resource_reference(&tex->dcc_retile_buffer, NULL);
FREE(tex);
}
@ -1145,9 +1147,10 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
}
}
}
}
/* Initialize displayable DCC that requires the retile blit. */
if (tex->surface.dcc_retile_map_offset) {
if (tex->surface.display_dcc_offset) {
/* Uninitialized DCC can hang the display hw.
* Clear to white to indicate that. */
si_screen_clear_buffer(sscreen, &tex->buffer.b.b, tex->surface.display_dcc_offset,
@ -1160,6 +1163,10 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
tex->dcc_retile_buffer = si_aligned_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
dcc_retile_map_size,
sscreen->info.tcc_cache_line_size);
struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
dcc_retile_map_size,
sscreen->info.tcc_cache_line_size);
@ -1171,16 +1178,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
/* Copy the staging buffer to the buffer backing the texture. */
struct si_context *sctx = (struct si_context *)sscreen->aux_context;
assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
simple_mtx_lock(&sscreen->aux_context_lock);
si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b, tex->surface.dcc_retile_map_offset,
si_sdma_copy_buffer(sctx, &tex->dcc_retile_buffer->b.b, &buf->b.b, 0,
0, buf->b.b.width0);
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
simple_mtx_unlock(&sscreen->aux_context_lock);
si_resource_reference(&buf, NULL);
}
}
/* Initialize the CMASK base register value. */
tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;