etnaviv: implement buffer compression

Vivante GPUs have lossless buffer compression using the tile-status bits,
which can reduce memory access and thus improve performance.

This patch only enables compression for "V4" compression GPUs, but the
implementation is tested on GC2000(V1) and GC3000(V2). V1/V2 compresssion
looks absolutely useless, so it is not enabled.

I couldn't test if this patch breaks MSAA, because it looks like MSAA is
already broken.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
This commit is contained in:
Jonathan Marek 2019-07-01 18:41:20 -04:00
parent f6a0d17abe
commit 5feb8adb0f
8 changed files with 65 additions and 49 deletions

View File

@ -70,8 +70,8 @@ blt_compute_img_config_bits(const struct blt_imginfo *img, bool for_dest)
return BLT_IMAGE_CONFIG_TS_MODE(img->ts_mode) |
COND(img->use_ts, BLT_IMAGE_CONFIG_TS) |
COND(img->compressed, BLT_IMAGE_CONFIG_COMPRESSION) |
BLT_IMAGE_CONFIG_COMPRESSION_FORMAT(img->compress_fmt) |
COND(img->use_ts && img->ts_compress_fmt >= 0, BLT_IMAGE_CONFIG_COMPRESSION) |
BLT_IMAGE_CONFIG_COMPRESSION_FORMAT(img->ts_compress_fmt) |
COND(for_dest, BLT_IMAGE_CONFIG_UNK22) |
BLT_IMAGE_CONFIG_SWIZ_R(0) | /* not used? */
BLT_IMAGE_CONFIG_SWIZ_G(1) |
@ -211,10 +211,6 @@ etna_blit_clear_color_blt(struct pipe_context *pctx, struct pipe_surface *dst,
clr.dest.addr.flags = ETNA_RELOC_WRITE;
clr.dest.bpp = util_format_get_blocksize(surf->base.format);
clr.dest.stride = surf->surf.stride;
/* TODO: color compression
clr.dest.compressed = 1;
clr.dest.compress_fmt = 3;
*/
clr.dest.tiling = res->layout;
if (surf->surf.ts_size) {
@ -225,6 +221,7 @@ etna_blit_clear_color_blt(struct pipe_context *pctx, struct pipe_surface *dst,
clr.dest.ts_clear_value[0] = new_clear_value;
clr.dest.ts_clear_value[1] = new_clear_value;
clr.dest.ts_mode = surf->level->ts_mode;
clr.dest.ts_compress_fmt = surf->level->ts_compress_fmt;
}
clr.clear_value[0] = new_clear_value;
@ -287,10 +284,6 @@ etna_blit_clear_zs_blt(struct pipe_context *pctx, struct pipe_surface *dst,
clr.dest.addr.flags = ETNA_RELOC_WRITE;
clr.dest.bpp = util_format_get_blocksize(surf->base.format);
clr.dest.stride = surf->surf.stride;
#if 0 /* TODO depth compression */
clr.dest.compressed = 1;
clr.dest.compress_fmt = COLOR_COMPRESSION_FORMAT_D24S8;
#endif
clr.dest.tiling = res->layout;
if (surf->surf.ts_size) {
@ -301,6 +294,7 @@ etna_blit_clear_zs_blt(struct pipe_context *pctx, struct pipe_surface *dst,
clr.dest.ts_clear_value[0] = new_clear_value;
clr.dest.ts_clear_value[1] = new_clear_value;
clr.dest.ts_mode = surf->level->ts_mode;
clr.dest.ts_compress_fmt = surf->level->ts_compress_fmt;
}
clr.clear_value[0] = new_clear_value;
@ -418,12 +412,19 @@ etna_try_blt_blit(struct pipe_context *pctx,
struct etna_resource_level *src_lev = &src->levels[blit_info->src.level];
struct etna_resource_level *dst_lev = &dst->levels[blit_info->dst.level];
/* Kick off BLT here */
/* if we asked for in-place resolve, return immediately if ts isn't valid
* do this check separately because it applies when compression is used, but
* we can't use inplace resolve path with compression
*/
if (src == dst) {
/* Resolve-in-place */
assert(!memcmp(&blit_info->src, &blit_info->dst, sizeof(blit_info->src)));
if (!src_lev->ts_size || !src_lev->ts_valid) /* No TS, no worries */
return true;
}
/* Kick off BLT here */
if (src == dst && src_lev->ts_compress_fmt < 0) {
/* Resolve-in-place */
struct blt_inplace_op op = {};
op.addr.bo = src->bo;
@ -464,6 +465,7 @@ etna_try_blt_blit(struct pipe_context *pctx,
op.src.ts_clear_value[0] = src_lev->clear_value;
op.src.ts_clear_value[1] = src_lev->clear_value;
op.src.ts_mode = src_lev->ts_mode;
op.src.ts_compress_fmt = src_lev->ts_compress_fmt;
}
op.dest.addr.bo = dst->bo;
@ -471,10 +473,6 @@ etna_try_blt_blit(struct pipe_context *pctx,
op.dest.addr.flags = ETNA_RELOC_WRITE;
op.dest.format = translate_blt_format(dst_format);
op.dest.stride = dst_lev->stride;
/* TODO color compression
op.dest.compressed = 1;
op.dest.compress_fmt = 3;
*/
op.dest.tiling = dst->layout;
const struct util_format_description *dst_format_desc =
util_format_description(dst_format);

View File

@ -37,17 +37,16 @@ struct pipe_context;
/* src/dest info for image operations */
struct blt_imginfo
{
unsigned compressed:1;
unsigned use_ts:1;
struct etna_reloc addr;
struct etna_reloc ts_addr;
uint32_t format; /* BLT_FORMAT_* */
uint32_t stride;
uint32_t compress_fmt; /* COLOR_COMPRESSION_FORMAT_* */
enum etna_surface_layout tiling; /* ETNA_LAYOUT_* */
uint32_t ts_clear_value[2];
uint8_t swizzle[4]; /* TEXTURE_SWIZZLE_* */
uint8_t ts_mode; /* TS_MODE_* */
int8_t ts_compress_fmt; /* COLOR_COMPRESSION_FORMAT_* */
uint8_t endian_mode; /* ENDIAN_MODE_* */
uint8_t bpp; /* # bytes per pixel 1/2/4/8 - only used for CLEAR_IMAGE */
};

View File

@ -87,10 +87,24 @@ etna_screen_resource_alloc_ts(struct pipe_screen *pscreen,
size_t rt_ts_size, ts_layer_stride;
size_t ts_bits_per_tile, bytes_per_tile;
uint8_t ts_mode = TS_MODE_128B; /* only used by halti5 */
int8_t ts_compress_fmt;
assert(!rsc->ts_bo);
/* pre-v4 compression is largely useless, so disable it when not wanted for MSAA
* v4 compression can be enabled everywhere without any known drawback,
* except that in-place resolve must go through a slower path
*/
ts_compress_fmt = (screen->specs.v4_compression || rsc->base.nr_samples > 1) ?
translate_ts_format(rsc->base.format) : -1;
if (screen->specs.halti >= 5) {
/* enable 256B ts mode with compression, as it improves performance
* the size of the resource might also determine if we want to use it or not
*/
if (ts_compress_fmt >= 0)
ts_mode = TS_MODE_256B;
ts_bits_per_tile = 4;
bytes_per_tile = ts_mode == TS_MODE_256B ? 256 : 128;
} else {
@ -121,6 +135,7 @@ etna_screen_resource_alloc_ts(struct pipe_screen *pscreen,
rsc->levels[0].ts_layer_stride = ts_layer_stride;
rsc->levels[0].ts_size = rt_ts_size;
rsc->levels[0].ts_mode = ts_mode;
rsc->levels[0].ts_compress_fmt = ts_compress_fmt;
return true;
}

View File

@ -52,6 +52,7 @@ struct etna_resource_level {
uint32_t clear_value; /* clear value of resource level (mainly for TS) */
bool ts_valid;
uint8_t ts_mode;
int8_t ts_compress_fmt; /* COLOR_COMPRESSION_FORMAT_* (-1 = disable) */
/* keep track if we have done some per block patching */
bool patched;

View File

@ -145,7 +145,8 @@ etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
rs->source_stride == rs->dest_stride &&
!rs->downsample_x && !rs->downsample_y &&
!rs->swap_rb && !rs->flip &&
!rs->clear_mode && rs->source_padded_width) {
!rs->clear_mode && rs->source_padded_width &&
!rs->source_ts_compressed) {
/* Total number of tiles (same as for autodisable) */
cs->RS_KICKER_INPLACE = rs->tile_count;
}
@ -545,7 +546,6 @@ etna_try_rs_blit(struct pipe_context *pctx,
struct etna_resource *src = etna_resource(blit_info->src.resource);
struct etna_resource *dst = etna_resource(blit_info->dst.resource);
struct compiled_rs_state copy_to_screen;
uint32_t ts_mem_config = 0;
int msaa_xscale = 1, msaa_yscale = 1;
/* Ensure that the level is valid */
@ -661,13 +661,6 @@ etna_try_rs_blit(struct pipe_context *pctx,
width & (w_align - 1) || height & (h_align - 1))
goto manual;
if (src->base.nr_samples > 1) {
uint32_t ts_format = translate_ts_format(src_format);
assert(ts_format != ETNA_NO_MATCH);
ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(ts_format);
}
/* Always flush color and depth cache together before resolving. This works
* around artifacts that appear in some cases when scanning out a texture
* directly after it has been rendered to, such as rendering an animated web
@ -683,18 +676,22 @@ etna_try_rs_blit(struct pipe_context *pctx,
VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
etna_stall(ctx->stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
if (src->levels[blit_info->src.level].ts_size &&
src->levels[blit_info->src.level].ts_valid)
if (src_lev->ts_size && src_lev->ts_valid)
etna_set_state(ctx->stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
}
/* Set up color TS to source surface before blit, if needed */
bool source_ts_valid = false;
if (src->levels[blit_info->src.level].ts_size &&
src->levels[blit_info->src.level].ts_valid) {
if (src_lev->ts_size && src_lev->ts_valid) {
struct etna_reloc reloc;
unsigned ts_offset =
src_lev->ts_offset + blit_info->src.box.z * src_lev->ts_layer_stride;
uint32_t ts_mem_config = 0;
if (src_lev->ts_compress_fmt >= 0) {
ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(src_lev->ts_compress_fmt);
}
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG,
VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR | ts_mem_config);
@ -712,12 +709,11 @@ etna_try_rs_blit(struct pipe_context *pctx,
reloc.flags = ETNA_RELOC_READ;
etna_set_state_reloc(ctx->stream, VIVS_TS_COLOR_SURFACE_BASE, &reloc);
etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE,
src->levels[blit_info->src.level].clear_value);
etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE, src_lev->clear_value);
source_ts_valid = true;
} else {
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, ts_mem_config);
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, 0);
}
ctx->dirty |= ETNA_DIRTY_TS;
@ -731,6 +727,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
.source_padded_width = src_lev->padded_width,
.source_padded_height = src_lev->padded_height,
.source_ts_valid = source_ts_valid,
.source_ts_compressed = src_lev->ts_compress_fmt >= 0,
.dest_format = translate_rs_format(dst_format),
.dest_tiling = dst->layout,
.dest = dst->bo,
@ -751,7 +748,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
resource_read(ctx, &src->base);
resource_written(ctx, &dst->base);
dst->seqno++;
dst->levels[blit_info->dst.level].ts_valid = false;
dst_lev->ts_valid = false;
ctx->dirty |= ETNA_DIRTY_DERIVE_TS;
return true;

View File

@ -34,6 +34,7 @@ struct rs_state {
uint8_t downsample_x : 1; /* Downsample in x direction */
uint8_t downsample_y : 1; /* Downsample in y direction */
uint8_t source_ts_valid : 1;
uint8_t source_ts_compressed : 1;
uint8_t source_format; /* RS_FORMAT_XXX */
uint8_t source_tiling; /* ETNA_LAYOUT_XXX */

View File

@ -176,13 +176,16 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
cs->TS_COLOR_SURFACE_BASE.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE;
pe_mem_config |= VIVS_PE_MEM_CONFIG_COLOR_TS_MODE(cbuf->level->ts_mode);
}
/* MSAA */
if (cbuf->base.texture->nr_samples > 1) {
ts_mem_config |=
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(translate_ts_format(cbuf->base.format));
if (cbuf->level->ts_compress_fmt >= 0) {
/* overwrite bit breaks v1/v2 compression */
if (!ctx->specs.v4_compression)
cs->PE_COLOR_FORMAT &= ~VIVS_PE_COLOR_FORMAT_OVERWRITE;
ts_mem_config |=
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(cbuf->level->ts_compress_fmt);
}
}
nr_samples_color = cbuf->base.texture->nr_samples;
@ -246,16 +249,17 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
cs->TS_DEPTH_SURFACE_BASE.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE;
pe_mem_config |= VIVS_PE_MEM_CONFIG_DEPTH_TS_MODE(zsbuf->level->ts_mode);
if (zsbuf->level->ts_compress_fmt >= 0) {
ts_mem_config |=
VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION |
COND(zsbuf->level->ts_compress_fmt == COMPRESSION_FORMAT_D24S8,
VIVS_TS_MEM_CONFIG_STENCIL_ENABLE);
}
}
ts_mem_config |= COND(depth_bits == 16, VIVS_TS_MEM_CONFIG_DEPTH_16BPP);
/* MSAA */
if (zsbuf->base.texture->nr_samples > 1)
/* XXX VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION;
* Disable without MSAA for now, as it causes corruption in glquake. */
ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION;
nr_samples_depth = zsbuf->base.texture->nr_samples;
} else {
cs->PE_DEPTH_CONFIG = VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_NONE;

View File

@ -86,7 +86,8 @@ etna_configure_sampler_ts(struct etna_sampler_ts *sts, struct pipe_sampler_view
sts->mode = lev->ts_mode;
sts->TS_SAMPLER_CONFIG =
VIVS_TS_SAMPLER_CONFIG_ENABLE |
VIVS_TS_SAMPLER_CONFIG_COMPRESSION_FORMAT(translate_ts_format(rsc->base.format));
COND(lev->ts_compress_fmt >= 0, VIVS_TS_SAMPLER_CONFIG_COMPRESSION) |
VIVS_TS_SAMPLER_CONFIG_COMPRESSION_FORMAT(lev->ts_compress_fmt);
sts->TS_SAMPLER_CLEAR_VALUE = lev->clear_value;
sts->TS_SAMPLER_CLEAR_VALUE2 = lev->clear_value; /* To handle 64-bit formats this needs a different value */
sts->TS_SAMPLER_STATUS_BASE.bo = rsc->ts_bo;
@ -123,7 +124,7 @@ etna_can_use_sampler_ts(struct pipe_sampler_view *view, int num)
return VIV_FEATURE(screen, chipMinorFeatures2, TEXTURE_TILED_READ) &&
num < VIVS_TS_SAMPLER__LEN &&
rsc->base.target != PIPE_BUFFER &&
translate_ts_format(rsc->base.format) != ETNA_NO_MATCH &&
(rsc->levels[0].ts_compress_fmt < 0 || screen->specs.v4_compression) &&
view->u.tex.first_level == 0 && MIN2(view->u.tex.last_level, rsc->base.last_level) == 0 &&
rsc->levels[0].ts_valid;
}