etnaviv: implement buffer compression
Vivante GPUs have lossless buffer compression using the tile-status bits, which can reduce memory access and thus improve performance. This patch only enables compression for "V4" compression GPUs, but the implementation is tested on GC2000(V1) and GC3000(V2). V1/V2 compresssion looks absolutely useless, so it is not enabled. I couldn't test if this patch breaks MSAA, because it looks like MSAA is already broken. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
This commit is contained in:
parent
f6a0d17abe
commit
5feb8adb0f
|
@ -70,8 +70,8 @@ blt_compute_img_config_bits(const struct blt_imginfo *img, bool for_dest)
|
|||
|
||||
return BLT_IMAGE_CONFIG_TS_MODE(img->ts_mode) |
|
||||
COND(img->use_ts, BLT_IMAGE_CONFIG_TS) |
|
||||
COND(img->compressed, BLT_IMAGE_CONFIG_COMPRESSION) |
|
||||
BLT_IMAGE_CONFIG_COMPRESSION_FORMAT(img->compress_fmt) |
|
||||
COND(img->use_ts && img->ts_compress_fmt >= 0, BLT_IMAGE_CONFIG_COMPRESSION) |
|
||||
BLT_IMAGE_CONFIG_COMPRESSION_FORMAT(img->ts_compress_fmt) |
|
||||
COND(for_dest, BLT_IMAGE_CONFIG_UNK22) |
|
||||
BLT_IMAGE_CONFIG_SWIZ_R(0) | /* not used? */
|
||||
BLT_IMAGE_CONFIG_SWIZ_G(1) |
|
||||
|
@ -211,10 +211,6 @@ etna_blit_clear_color_blt(struct pipe_context *pctx, struct pipe_surface *dst,
|
|||
clr.dest.addr.flags = ETNA_RELOC_WRITE;
|
||||
clr.dest.bpp = util_format_get_blocksize(surf->base.format);
|
||||
clr.dest.stride = surf->surf.stride;
|
||||
/* TODO: color compression
|
||||
clr.dest.compressed = 1;
|
||||
clr.dest.compress_fmt = 3;
|
||||
*/
|
||||
clr.dest.tiling = res->layout;
|
||||
|
||||
if (surf->surf.ts_size) {
|
||||
|
@ -225,6 +221,7 @@ etna_blit_clear_color_blt(struct pipe_context *pctx, struct pipe_surface *dst,
|
|||
clr.dest.ts_clear_value[0] = new_clear_value;
|
||||
clr.dest.ts_clear_value[1] = new_clear_value;
|
||||
clr.dest.ts_mode = surf->level->ts_mode;
|
||||
clr.dest.ts_compress_fmt = surf->level->ts_compress_fmt;
|
||||
}
|
||||
|
||||
clr.clear_value[0] = new_clear_value;
|
||||
|
@ -287,10 +284,6 @@ etna_blit_clear_zs_blt(struct pipe_context *pctx, struct pipe_surface *dst,
|
|||
clr.dest.addr.flags = ETNA_RELOC_WRITE;
|
||||
clr.dest.bpp = util_format_get_blocksize(surf->base.format);
|
||||
clr.dest.stride = surf->surf.stride;
|
||||
#if 0 /* TODO depth compression */
|
||||
clr.dest.compressed = 1;
|
||||
clr.dest.compress_fmt = COLOR_COMPRESSION_FORMAT_D24S8;
|
||||
#endif
|
||||
clr.dest.tiling = res->layout;
|
||||
|
||||
if (surf->surf.ts_size) {
|
||||
|
@ -301,6 +294,7 @@ etna_blit_clear_zs_blt(struct pipe_context *pctx, struct pipe_surface *dst,
|
|||
clr.dest.ts_clear_value[0] = new_clear_value;
|
||||
clr.dest.ts_clear_value[1] = new_clear_value;
|
||||
clr.dest.ts_mode = surf->level->ts_mode;
|
||||
clr.dest.ts_compress_fmt = surf->level->ts_compress_fmt;
|
||||
}
|
||||
|
||||
clr.clear_value[0] = new_clear_value;
|
||||
|
@ -418,12 +412,19 @@ etna_try_blt_blit(struct pipe_context *pctx,
|
|||
struct etna_resource_level *src_lev = &src->levels[blit_info->src.level];
|
||||
struct etna_resource_level *dst_lev = &dst->levels[blit_info->dst.level];
|
||||
|
||||
/* Kick off BLT here */
|
||||
/* if we asked for in-place resolve, return immediately if ts isn't valid
|
||||
* do this check separately because it applies when compression is used, but
|
||||
* we can't use inplace resolve path with compression
|
||||
*/
|
||||
if (src == dst) {
|
||||
/* Resolve-in-place */
|
||||
assert(!memcmp(&blit_info->src, &blit_info->dst, sizeof(blit_info->src)));
|
||||
if (!src_lev->ts_size || !src_lev->ts_valid) /* No TS, no worries */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Kick off BLT here */
|
||||
if (src == dst && src_lev->ts_compress_fmt < 0) {
|
||||
/* Resolve-in-place */
|
||||
struct blt_inplace_op op = {};
|
||||
|
||||
op.addr.bo = src->bo;
|
||||
|
@ -464,6 +465,7 @@ etna_try_blt_blit(struct pipe_context *pctx,
|
|||
op.src.ts_clear_value[0] = src_lev->clear_value;
|
||||
op.src.ts_clear_value[1] = src_lev->clear_value;
|
||||
op.src.ts_mode = src_lev->ts_mode;
|
||||
op.src.ts_compress_fmt = src_lev->ts_compress_fmt;
|
||||
}
|
||||
|
||||
op.dest.addr.bo = dst->bo;
|
||||
|
@ -471,10 +473,6 @@ etna_try_blt_blit(struct pipe_context *pctx,
|
|||
op.dest.addr.flags = ETNA_RELOC_WRITE;
|
||||
op.dest.format = translate_blt_format(dst_format);
|
||||
op.dest.stride = dst_lev->stride;
|
||||
/* TODO color compression
|
||||
op.dest.compressed = 1;
|
||||
op.dest.compress_fmt = 3;
|
||||
*/
|
||||
op.dest.tiling = dst->layout;
|
||||
const struct util_format_description *dst_format_desc =
|
||||
util_format_description(dst_format);
|
||||
|
|
|
@ -37,17 +37,16 @@ struct pipe_context;
|
|||
/* src/dest info for image operations */
|
||||
struct blt_imginfo
|
||||
{
|
||||
unsigned compressed:1;
|
||||
unsigned use_ts:1;
|
||||
struct etna_reloc addr;
|
||||
struct etna_reloc ts_addr;
|
||||
uint32_t format; /* BLT_FORMAT_* */
|
||||
uint32_t stride;
|
||||
uint32_t compress_fmt; /* COLOR_COMPRESSION_FORMAT_* */
|
||||
enum etna_surface_layout tiling; /* ETNA_LAYOUT_* */
|
||||
uint32_t ts_clear_value[2];
|
||||
uint8_t swizzle[4]; /* TEXTURE_SWIZZLE_* */
|
||||
uint8_t ts_mode; /* TS_MODE_* */
|
||||
int8_t ts_compress_fmt; /* COLOR_COMPRESSION_FORMAT_* */
|
||||
uint8_t endian_mode; /* ENDIAN_MODE_* */
|
||||
uint8_t bpp; /* # bytes per pixel 1/2/4/8 - only used for CLEAR_IMAGE */
|
||||
};
|
||||
|
|
|
@ -87,10 +87,24 @@ etna_screen_resource_alloc_ts(struct pipe_screen *pscreen,
|
|||
size_t rt_ts_size, ts_layer_stride;
|
||||
size_t ts_bits_per_tile, bytes_per_tile;
|
||||
uint8_t ts_mode = TS_MODE_128B; /* only used by halti5 */
|
||||
int8_t ts_compress_fmt;
|
||||
|
||||
assert(!rsc->ts_bo);
|
||||
|
||||
/* pre-v4 compression is largely useless, so disable it when not wanted for MSAA
|
||||
* v4 compression can be enabled everywhere without any known drawback,
|
||||
* except that in-place resolve must go through a slower path
|
||||
*/
|
||||
ts_compress_fmt = (screen->specs.v4_compression || rsc->base.nr_samples > 1) ?
|
||||
translate_ts_format(rsc->base.format) : -1;
|
||||
|
||||
if (screen->specs.halti >= 5) {
|
||||
/* enable 256B ts mode with compression, as it improves performance
|
||||
* the size of the resource might also determine if we want to use it or not
|
||||
*/
|
||||
if (ts_compress_fmt >= 0)
|
||||
ts_mode = TS_MODE_256B;
|
||||
|
||||
ts_bits_per_tile = 4;
|
||||
bytes_per_tile = ts_mode == TS_MODE_256B ? 256 : 128;
|
||||
} else {
|
||||
|
@ -121,6 +135,7 @@ etna_screen_resource_alloc_ts(struct pipe_screen *pscreen,
|
|||
rsc->levels[0].ts_layer_stride = ts_layer_stride;
|
||||
rsc->levels[0].ts_size = rt_ts_size;
|
||||
rsc->levels[0].ts_mode = ts_mode;
|
||||
rsc->levels[0].ts_compress_fmt = ts_compress_fmt;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ struct etna_resource_level {
|
|||
uint32_t clear_value; /* clear value of resource level (mainly for TS) */
|
||||
bool ts_valid;
|
||||
uint8_t ts_mode;
|
||||
int8_t ts_compress_fmt; /* COLOR_COMPRESSION_FORMAT_* (-1 = disable) */
|
||||
|
||||
/* keep track if we have done some per block patching */
|
||||
bool patched;
|
||||
|
|
|
@ -145,7 +145,8 @@ etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
|
|||
rs->source_stride == rs->dest_stride &&
|
||||
!rs->downsample_x && !rs->downsample_y &&
|
||||
!rs->swap_rb && !rs->flip &&
|
||||
!rs->clear_mode && rs->source_padded_width) {
|
||||
!rs->clear_mode && rs->source_padded_width &&
|
||||
!rs->source_ts_compressed) {
|
||||
/* Total number of tiles (same as for autodisable) */
|
||||
cs->RS_KICKER_INPLACE = rs->tile_count;
|
||||
}
|
||||
|
@ -545,7 +546,6 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
|||
struct etna_resource *src = etna_resource(blit_info->src.resource);
|
||||
struct etna_resource *dst = etna_resource(blit_info->dst.resource);
|
||||
struct compiled_rs_state copy_to_screen;
|
||||
uint32_t ts_mem_config = 0;
|
||||
int msaa_xscale = 1, msaa_yscale = 1;
|
||||
|
||||
/* Ensure that the level is valid */
|
||||
|
@ -661,13 +661,6 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
|||
width & (w_align - 1) || height & (h_align - 1))
|
||||
goto manual;
|
||||
|
||||
if (src->base.nr_samples > 1) {
|
||||
uint32_t ts_format = translate_ts_format(src_format);
|
||||
assert(ts_format != ETNA_NO_MATCH);
|
||||
ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
|
||||
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(ts_format);
|
||||
}
|
||||
|
||||
/* Always flush color and depth cache together before resolving. This works
|
||||
* around artifacts that appear in some cases when scanning out a texture
|
||||
* directly after it has been rendered to, such as rendering an animated web
|
||||
|
@ -683,18 +676,22 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
|||
VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
|
||||
etna_stall(ctx->stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
|
||||
|
||||
if (src->levels[blit_info->src.level].ts_size &&
|
||||
src->levels[blit_info->src.level].ts_valid)
|
||||
if (src_lev->ts_size && src_lev->ts_valid)
|
||||
etna_set_state(ctx->stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
|
||||
}
|
||||
|
||||
/* Set up color TS to source surface before blit, if needed */
|
||||
bool source_ts_valid = false;
|
||||
if (src->levels[blit_info->src.level].ts_size &&
|
||||
src->levels[blit_info->src.level].ts_valid) {
|
||||
if (src_lev->ts_size && src_lev->ts_valid) {
|
||||
struct etna_reloc reloc;
|
||||
unsigned ts_offset =
|
||||
src_lev->ts_offset + blit_info->src.box.z * src_lev->ts_layer_stride;
|
||||
uint32_t ts_mem_config = 0;
|
||||
|
||||
if (src_lev->ts_compress_fmt >= 0) {
|
||||
ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
|
||||
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(src_lev->ts_compress_fmt);
|
||||
}
|
||||
|
||||
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG,
|
||||
VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR | ts_mem_config);
|
||||
|
@ -712,12 +709,11 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
|||
reloc.flags = ETNA_RELOC_READ;
|
||||
etna_set_state_reloc(ctx->stream, VIVS_TS_COLOR_SURFACE_BASE, &reloc);
|
||||
|
||||
etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE,
|
||||
src->levels[blit_info->src.level].clear_value);
|
||||
etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE, src_lev->clear_value);
|
||||
|
||||
source_ts_valid = true;
|
||||
} else {
|
||||
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, ts_mem_config);
|
||||
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, 0);
|
||||
}
|
||||
ctx->dirty |= ETNA_DIRTY_TS;
|
||||
|
||||
|
@ -731,6 +727,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
|||
.source_padded_width = src_lev->padded_width,
|
||||
.source_padded_height = src_lev->padded_height,
|
||||
.source_ts_valid = source_ts_valid,
|
||||
.source_ts_compressed = src_lev->ts_compress_fmt >= 0,
|
||||
.dest_format = translate_rs_format(dst_format),
|
||||
.dest_tiling = dst->layout,
|
||||
.dest = dst->bo,
|
||||
|
@ -751,7 +748,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
|||
resource_read(ctx, &src->base);
|
||||
resource_written(ctx, &dst->base);
|
||||
dst->seqno++;
|
||||
dst->levels[blit_info->dst.level].ts_valid = false;
|
||||
dst_lev->ts_valid = false;
|
||||
ctx->dirty |= ETNA_DIRTY_DERIVE_TS;
|
||||
|
||||
return true;
|
||||
|
|
|
@ -34,6 +34,7 @@ struct rs_state {
|
|||
uint8_t downsample_x : 1; /* Downsample in x direction */
|
||||
uint8_t downsample_y : 1; /* Downsample in y direction */
|
||||
uint8_t source_ts_valid : 1;
|
||||
uint8_t source_ts_compressed : 1;
|
||||
|
||||
uint8_t source_format; /* RS_FORMAT_XXX */
|
||||
uint8_t source_tiling; /* ETNA_LAYOUT_XXX */
|
||||
|
|
|
@ -176,13 +176,16 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
|||
cs->TS_COLOR_SURFACE_BASE.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE;
|
||||
|
||||
pe_mem_config |= VIVS_PE_MEM_CONFIG_COLOR_TS_MODE(cbuf->level->ts_mode);
|
||||
}
|
||||
|
||||
/* MSAA */
|
||||
if (cbuf->base.texture->nr_samples > 1) {
|
||||
ts_mem_config |=
|
||||
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
|
||||
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(translate_ts_format(cbuf->base.format));
|
||||
if (cbuf->level->ts_compress_fmt >= 0) {
|
||||
/* overwrite bit breaks v1/v2 compression */
|
||||
if (!ctx->specs.v4_compression)
|
||||
cs->PE_COLOR_FORMAT &= ~VIVS_PE_COLOR_FORMAT_OVERWRITE;
|
||||
|
||||
ts_mem_config |=
|
||||
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION |
|
||||
VIVS_TS_MEM_CONFIG_COLOR_COMPRESSION_FORMAT(cbuf->level->ts_compress_fmt);
|
||||
}
|
||||
}
|
||||
|
||||
nr_samples_color = cbuf->base.texture->nr_samples;
|
||||
|
@ -246,16 +249,17 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
|||
cs->TS_DEPTH_SURFACE_BASE.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE;
|
||||
|
||||
pe_mem_config |= VIVS_PE_MEM_CONFIG_DEPTH_TS_MODE(zsbuf->level->ts_mode);
|
||||
|
||||
if (zsbuf->level->ts_compress_fmt >= 0) {
|
||||
ts_mem_config |=
|
||||
VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION |
|
||||
COND(zsbuf->level->ts_compress_fmt == COMPRESSION_FORMAT_D24S8,
|
||||
VIVS_TS_MEM_CONFIG_STENCIL_ENABLE);
|
||||
}
|
||||
}
|
||||
|
||||
ts_mem_config |= COND(depth_bits == 16, VIVS_TS_MEM_CONFIG_DEPTH_16BPP);
|
||||
|
||||
/* MSAA */
|
||||
if (zsbuf->base.texture->nr_samples > 1)
|
||||
/* XXX VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION;
|
||||
* Disable without MSAA for now, as it causes corruption in glquake. */
|
||||
ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION;
|
||||
|
||||
nr_samples_depth = zsbuf->base.texture->nr_samples;
|
||||
} else {
|
||||
cs->PE_DEPTH_CONFIG = VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_NONE;
|
||||
|
|
|
@ -86,7 +86,8 @@ etna_configure_sampler_ts(struct etna_sampler_ts *sts, struct pipe_sampler_view
|
|||
sts->mode = lev->ts_mode;
|
||||
sts->TS_SAMPLER_CONFIG =
|
||||
VIVS_TS_SAMPLER_CONFIG_ENABLE |
|
||||
VIVS_TS_SAMPLER_CONFIG_COMPRESSION_FORMAT(translate_ts_format(rsc->base.format));
|
||||
COND(lev->ts_compress_fmt >= 0, VIVS_TS_SAMPLER_CONFIG_COMPRESSION) |
|
||||
VIVS_TS_SAMPLER_CONFIG_COMPRESSION_FORMAT(lev->ts_compress_fmt);
|
||||
sts->TS_SAMPLER_CLEAR_VALUE = lev->clear_value;
|
||||
sts->TS_SAMPLER_CLEAR_VALUE2 = lev->clear_value; /* To handle 64-bit formats this needs a different value */
|
||||
sts->TS_SAMPLER_STATUS_BASE.bo = rsc->ts_bo;
|
||||
|
@ -123,7 +124,7 @@ etna_can_use_sampler_ts(struct pipe_sampler_view *view, int num)
|
|||
return VIV_FEATURE(screen, chipMinorFeatures2, TEXTURE_TILED_READ) &&
|
||||
num < VIVS_TS_SAMPLER__LEN &&
|
||||
rsc->base.target != PIPE_BUFFER &&
|
||||
translate_ts_format(rsc->base.format) != ETNA_NO_MATCH &&
|
||||
(rsc->levels[0].ts_compress_fmt < 0 || screen->specs.v4_compression) &&
|
||||
view->u.tex.first_level == 0 && MIN2(view->u.tex.last_level, rsc->base.last_level) == 0 &&
|
||||
rsc->levels[0].ts_valid;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue