radeonsi: implement and use compute-based DCC decompression on gfx9-10
DCC_DECOMPRESS doesn't work. Instead of trying to figure out why, use a compute blit where the load is compressed and the store is uncompressed. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4761>
This commit is contained in:
parent
d3da73954a
commit
d6acdbd935
|
@ -1269,6 +1269,9 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||||
|
|
||||||
surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
|
surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
|
||||||
surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
|
surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
|
||||||
|
surf->u.gfx9.dcc_block_width = dout.compressBlkWidth;
|
||||||
|
surf->u.gfx9.dcc_block_height = dout.compressBlkHeight;
|
||||||
|
surf->u.gfx9.dcc_block_depth = dout.compressBlkDepth;
|
||||||
surf->dcc_size = dout.dccRamSize;
|
surf->dcc_size = dout.dccRamSize;
|
||||||
surf->dcc_alignment = dout.dccRamBaseAlign;
|
surf->dcc_alignment = dout.dccRamBaseAlign;
|
||||||
surf->num_dcc_levels = in->numMipLevels;
|
surf->num_dcc_levels = in->numMipLevels;
|
||||||
|
|
|
@ -167,6 +167,10 @@ struct gfx9_surf_layout {
|
||||||
|
|
||||||
uint64_t stencil_offset; /* separate stencil */
|
uint64_t stencil_offset; /* separate stencil */
|
||||||
|
|
||||||
|
uint8_t dcc_block_width;
|
||||||
|
uint8_t dcc_block_height;
|
||||||
|
uint8_t dcc_block_depth;
|
||||||
|
|
||||||
/* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
|
/* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
|
||||||
* The 3D engine doesn't support that layout except for chips with 1 RB.
|
* The 3D engine doesn't support that layout except for chips with 1 RB.
|
||||||
* All other chips must set rb_aligned=1.
|
* All other chips must set rb_aligned=1.
|
||||||
|
|
|
@ -419,6 +419,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
|
||||||
first_level, last_level, level_mask);
|
first_level, last_level, level_mask);
|
||||||
|
|
||||||
if (need_dcc_decompress) {
|
if (need_dcc_decompress) {
|
||||||
|
assert(sctx->chip_class == GFX8);
|
||||||
custom_blend = sctx->custom_blend_dcc_decompress;
|
custom_blend = sctx->custom_blend_dcc_decompress;
|
||||||
|
|
||||||
assert(tex->surface.dcc_offset);
|
assert(tex->surface.dcc_offset);
|
||||||
|
@ -834,7 +835,8 @@ void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst
|
||||||
!sdst->surface.dcc_offset &&
|
!sdst->surface.dcc_offset &&
|
||||||
!(dst->target != src->target &&
|
!(dst->target != src->target &&
|
||||||
(src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
|
(src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
|
||||||
si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box);
|
si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz,
|
||||||
|
src_box, false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1226,8 +1228,29 @@ void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex)
|
||||||
if (!tex->surface.dcc_offset || !sctx->has_graphics)
|
if (!tex->surface.dcc_offset || !sctx->has_graphics)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 0,
|
if (sctx->chip_class == GFX8) {
|
||||||
util_max_layer(&tex->buffer.b.b, 0), true, false);
|
si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, 0,
|
||||||
|
util_max_layer(&tex->buffer.b.b, 0), true, false);
|
||||||
|
} else {
|
||||||
|
struct pipe_resource *ptex = &tex->buffer.b.b;
|
||||||
|
|
||||||
|
/* DCC decompression using a compute shader. */
|
||||||
|
for (unsigned level = 0; level < tex->surface.num_dcc_levels; level++) {
|
||||||
|
struct pipe_box box;
|
||||||
|
|
||||||
|
u_box_3d(0, 0, 0, u_minify(ptex->width0, level),
|
||||||
|
u_minify(ptex->height0, level),
|
||||||
|
util_num_layers(ptex, level), &box);
|
||||||
|
si_compute_copy_image(sctx, ptex, level, ptex, level, 0, 0, 0, &box,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now clear DCC metadata to uncompressed. */
|
||||||
|
uint32_t clear_value = DCC_UNCOMPRESSED;
|
||||||
|
si_clear_buffer(sctx, ptex, tex->surface.dcc_offset,
|
||||||
|
tex->surface.dcc_size, &clear_value, 4,
|
||||||
|
SI_COHERENCY_CB_META, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_init_blit_functions(struct si_context *sctx)
|
void si_init_blit_functions(struct si_context *sctx)
|
||||||
|
|
|
@ -376,7 +376,8 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p
|
||||||
|
|
||||||
void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
||||||
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
||||||
unsigned dsty, unsigned dstz, const struct pipe_box *src_box)
|
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
|
||||||
|
bool is_dcc_decompress)
|
||||||
{
|
{
|
||||||
struct pipe_context *ctx = &sctx->b;
|
struct pipe_context *ctx = &sctx->b;
|
||||||
unsigned width = src_box->width;
|
unsigned width = src_box->width;
|
||||||
|
@ -396,7 +397,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||||
* we must keep the original values to get the correct results.
|
* we must keep the original values to get the correct results.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0};
|
|
||||||
|
|
||||||
if (width == 0 || height == 0)
|
if (width == 0 || height == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -413,7 +413,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||||
((struct si_texture *)src)->surface.u.gfx9.dcc.pipe_aligned);
|
((struct si_texture *)src)->surface.u.gfx9.dcc.pipe_aligned);
|
||||||
|
|
||||||
struct pipe_constant_buffer saved_cb = {};
|
struct pipe_constant_buffer saved_cb = {};
|
||||||
si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
|
|
||||||
|
|
||||||
struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
|
struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
|
||||||
struct pipe_image_view saved_image[2] = {0};
|
struct pipe_image_view saved_image[2] = {0};
|
||||||
|
@ -422,10 +421,16 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||||
|
|
||||||
void *saved_cs = sctx->cs_shader_state.program;
|
void *saved_cs = sctx->cs_shader_state.program;
|
||||||
|
|
||||||
struct pipe_constant_buffer cb = {};
|
if (!is_dcc_decompress) {
|
||||||
cb.buffer_size = sizeof(data);
|
unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0};
|
||||||
cb.user_buffer = data;
|
|
||||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb);
|
si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
|
||||||
|
|
||||||
|
struct pipe_constant_buffer cb = {};
|
||||||
|
cb.buffer_size = sizeof(data);
|
||||||
|
cb.user_buffer = data;
|
||||||
|
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb);
|
||||||
|
}
|
||||||
|
|
||||||
struct pipe_image_view image[2] = {0};
|
struct pipe_image_view image[2] = {0};
|
||||||
image[0].resource = src;
|
image[0].resource = src;
|
||||||
|
@ -454,11 +459,44 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||||
image[0].format = image[1].format = util_format_snorm8_to_sint8(dst->format);
|
image[0].format = image[1].format = util_format_snorm8_to_sint8(dst->format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_dcc_decompress)
|
||||||
|
image[1].access |= SI_IMAGE_ACCESS_DCC_OFF;
|
||||||
|
|
||||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image);
|
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image);
|
||||||
|
|
||||||
struct pipe_grid_info info = {0};
|
struct pipe_grid_info info = {0};
|
||||||
|
|
||||||
if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) {
|
if (is_dcc_decompress) {
|
||||||
|
/* The DCC decompression is a normal blit where the load is compressed
|
||||||
|
* and the store is uncompressed. The workgroup size is either equal to
|
||||||
|
* the DCC block size or a multiple thereof. The shader uses a barrier
|
||||||
|
* between loads and stores to safely overwrite each DCC block of pixels.
|
||||||
|
*/
|
||||||
|
struct si_texture *tex = (struct si_texture*)src;
|
||||||
|
unsigned dim[3] = {src_box->width, src_box->height, src_box->depth};
|
||||||
|
|
||||||
|
assert(src == dst);
|
||||||
|
assert(dst->target != PIPE_TEXTURE_1D && dst->target != PIPE_TEXTURE_1D_ARRAY);
|
||||||
|
|
||||||
|
if (!sctx->cs_dcc_decompress)
|
||||||
|
sctx->cs_dcc_decompress = si_create_dcc_decompress_cs(ctx);
|
||||||
|
ctx->bind_compute_state(ctx, sctx->cs_dcc_decompress);
|
||||||
|
|
||||||
|
info.block[0] = tex->surface.u.gfx9.dcc_block_width;
|
||||||
|
info.block[1] = tex->surface.u.gfx9.dcc_block_height;
|
||||||
|
info.block[2] = tex->surface.u.gfx9.dcc_block_depth;
|
||||||
|
|
||||||
|
/* Make sure the block size is at least the same as wave size. */
|
||||||
|
while (info.block[0] * info.block[1] * info.block[2] <
|
||||||
|
sctx->screen->compute_wave_size) {
|
||||||
|
info.block[0] *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
|
info.last_block[i] = dim[i] % info.block[i];
|
||||||
|
info.grid[i] = DIV_ROUND_UP(dim[i], info.block[i]);
|
||||||
|
}
|
||||||
|
} else if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) {
|
||||||
if (!sctx->cs_copy_image_1d_array)
|
if (!sctx->cs_copy_image_1d_array)
|
||||||
sctx->cs_copy_image_1d_array = si_create_copy_image_compute_shader_1d_array(ctx);
|
sctx->cs_copy_image_1d_array = si_create_copy_image_compute_shader_1d_array(ctx);
|
||||||
ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
|
ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
|
||||||
|
@ -487,10 +525,12 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
||||||
SI_CS_WAIT_FOR_IDLE | SI_CS_IMAGE_OP);
|
SI_CS_WAIT_FOR_IDLE | SI_CS_IMAGE_OP);
|
||||||
|
|
||||||
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
|
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
|
||||||
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
|
|
||||||
for (int i = 0; i < 2; i++)
|
for (int i = 0; i < 2; i++)
|
||||||
pipe_resource_reference(&saved_image[i].resource, NULL);
|
pipe_resource_reference(&saved_image[i].resource, NULL);
|
||||||
pipe_resource_reference(&saved_cb.buffer, NULL);
|
if (!is_dcc_decompress) {
|
||||||
|
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
|
||||||
|
pipe_resource_reference(&saved_cb.buffer, NULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
|
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
|
||||||
|
|
|
@ -235,6 +235,8 @@ static void si_destroy_context(struct pipe_context *context)
|
||||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array);
|
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array);
|
||||||
if (sctx->cs_clear_12bytes_buffer)
|
if (sctx->cs_clear_12bytes_buffer)
|
||||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer);
|
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer);
|
||||||
|
if (sctx->cs_dcc_decompress)
|
||||||
|
sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_decompress);
|
||||||
if (sctx->cs_dcc_retile)
|
if (sctx->cs_dcc_retile)
|
||||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile);
|
sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile);
|
||||||
|
|
||||||
|
|
|
@ -927,6 +927,7 @@ struct si_context {
|
||||||
void *cs_clear_render_target;
|
void *cs_clear_render_target;
|
||||||
void *cs_clear_render_target_1d_array;
|
void *cs_clear_render_target_1d_array;
|
||||||
void *cs_clear_12bytes_buffer;
|
void *cs_clear_12bytes_buffer;
|
||||||
|
void *cs_dcc_decompress;
|
||||||
void *cs_dcc_retile;
|
void *cs_dcc_retile;
|
||||||
void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */
|
void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */
|
||||||
struct si_screen *screen;
|
struct si_screen *screen;
|
||||||
|
@ -1316,7 +1317,8 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p
|
||||||
uint64_t dst_offset, uint64_t src_offset, unsigned size);
|
uint64_t dst_offset, uint64_t src_offset, unsigned size);
|
||||||
void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
||||||
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
||||||
unsigned dsty, unsigned dstz, const struct pipe_box *src_box);
|
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
|
||||||
|
bool is_dcc_decompress);
|
||||||
void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dstsurf,
|
void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dstsurf,
|
||||||
const union pipe_color_union *color, unsigned dstx,
|
const union pipe_color_union *color, unsigned dstx,
|
||||||
unsigned dsty, unsigned width, unsigned height,
|
unsigned dsty, unsigned width, unsigned height,
|
||||||
|
@ -1455,6 +1457,7 @@ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords
|
||||||
bool dst_stream_cache_policy, bool is_copy);
|
bool dst_stream_cache_policy, bool is_copy);
|
||||||
void *si_create_copy_image_compute_shader(struct pipe_context *ctx);
|
void *si_create_copy_image_compute_shader(struct pipe_context *ctx);
|
||||||
void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx);
|
void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx);
|
||||||
|
void *si_create_dcc_decompress_cs(struct pipe_context *ctx);
|
||||||
void *si_clear_render_target_shader(struct pipe_context *ctx);
|
void *si_clear_render_target_shader(struct pipe_context *ctx);
|
||||||
void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx);
|
void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx);
|
||||||
void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx);
|
void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx);
|
||||||
|
|
|
@ -573,6 +573,45 @@ void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx)
|
||||||
return ctx->create_compute_state(ctx, &state);
|
return ctx->create_compute_state(ctx, &state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Create a compute shader implementing DCC decompression via a blit.
|
||||||
|
* This is a trivial copy_image shader except that it has a variable block
|
||||||
|
* size and a barrier.
|
||||||
|
*/
|
||||||
|
void *si_create_dcc_decompress_cs(struct pipe_context *ctx)
|
||||||
|
{
|
||||||
|
static const char text[] =
|
||||||
|
"COMP\n"
|
||||||
|
"DCL SV[0], THREAD_ID\n"
|
||||||
|
"DCL SV[1], BLOCK_ID\n"
|
||||||
|
"DCL SV[2], BLOCK_SIZE\n"
|
||||||
|
"DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
|
||||||
|
"DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
|
||||||
|
"DCL TEMP[0..1]\n"
|
||||||
|
|
||||||
|
"UMAD TEMP[0].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n"
|
||||||
|
"LOAD TEMP[1], IMAGE[0], TEMP[0].xyzz, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||||
|
/* Wait for the whole threadgroup (= DCC block) to load texels before
|
||||||
|
* overwriting them, because overwriting any pixel within a DCC block
|
||||||
|
* can break compression for the whole block.
|
||||||
|
*/
|
||||||
|
"BARRIER\n"
|
||||||
|
"STORE IMAGE[1], TEMP[0].xyzz, TEMP[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
|
||||||
|
"END\n";
|
||||||
|
|
||||||
|
struct tgsi_token tokens[1024];
|
||||||
|
struct pipe_compute_state state = {0};
|
||||||
|
|
||||||
|
if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
|
||||||
|
assert(false);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
state.ir_type = PIPE_SHADER_IR_TGSI;
|
||||||
|
state.prog = tokens;
|
||||||
|
|
||||||
|
return ctx->create_compute_state(ctx, &state);
|
||||||
|
}
|
||||||
|
|
||||||
void *si_clear_render_target_shader(struct pipe_context *ctx)
|
void *si_clear_render_target_shader(struct pipe_context *ctx)
|
||||||
{
|
{
|
||||||
static const char text[] =
|
static const char text[] =
|
||||||
|
|
Loading…
Reference in New Issue