From 059da344ec17853bb503a7e4afa229c2e2a98c83 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 7 Nov 2015 22:13:16 -0500 Subject: [PATCH] freedreno/a3xx: add fake RGTC support (required for GL3) Also throw in LATC while we're at it (same exact format). This could be made more efficient by keeping a shadow compressed texture to use for returning at map time. However... it's not worth it for now... presumably compressed textures are not updated often. Lastly fix up Z32S8 transfers to non-0 layers. Signed-off-by: Ilia Mirkin Signed-off-by: Rob Clark --- docs/relnotes/11.1.0.html | 1 + .../drivers/freedreno/a3xx/fd3_format.c | 20 ++ .../drivers/freedreno/a3xx/fd3_format.h | 1 + .../drivers/freedreno/a3xx/fd3_texture.c | 2 +- .../drivers/freedreno/freedreno_resource.c | 175 +++++++++++++++--- .../drivers/freedreno/freedreno_texture.c | 4 + 6 files changed, 176 insertions(+), 27 deletions(-) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 0075a00ad88..28fec7e89c4 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -60,6 +60,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx)
  • GL_EXT_buffer_storage implemented for when ES 3.1 support is gained
  • GL_EXT_draw_elements_base_vertex on all drivers
  • +
  • GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx)
  • GL_OES_draw_elements_base_vertex on all drivers
  • EGL_KHR_create_context on softpipe, llvmpipe
  • EGL_KHR_gl_colorspace on softpipe, llvmpipe
  • diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 9b313b598a8..52ea9444517 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -275,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(DXT3_SRGBA, DXT3, NONE, WZYX), _T(DXT5_RGBA, DXT5, NONE, WZYX), _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + /* faked */ + _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), }; enum a3xx_vtx_fmt @@ -314,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format) { if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; + else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; @@ -328,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format) } } +unsigned +fd3_pipe2nblocksx(enum pipe_format format, unsigned width) +{ + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + return util_format_get_nblocksx(format, width); +} + /* we need to special case a bit the depth/stencil restore, because we are * using the texture sampler to blit into the depth/stencil buffer, *not* * into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 05c5ea3d247..48c503e9a82 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format); enum pipe_format fd3_gmem_restore_format(enum pipe_format format); enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format); enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); +unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index 2d6ecb2c050..15e63e7d478 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -240,7 +240,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ so->texconst2 = - A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); switch (prsc->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 98de0969cab..6e22e39f52e 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -27,6 +27,7 @@ */ #include "util/u_format.h" +#include "util/u_format_rgtc.h" #include "util/u_format_zs.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) util_range_set_empty(&rsc->valid_buffer_range); } -/* Currently this is only used for flushing Z32_S8 texture transfers, but - * eventually it should handle everything. - */ +static unsigned +fd_resource_layer_offset(struct fd_resource *rsc, + struct fd_resource_slice *slice, + unsigned layer) +{ + if (rsc->layer_first) + return layer * rsc->layer_size; + else + return layer * slice->size0; +} + static void -fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(trans->base.resource); struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); @@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) enum pipe_format format = trans->base.resource->format; float *depth = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) + (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; - assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || - format == PIPE_FORMAT_X32_S8X24_UINT); - if (format != PIPE_FORMAT_X32_S8X24_UINT) util_format_z32_float_s8x24_uint_unpack_z_float( depth, slice->pitch * 4, @@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) box->width, box->height); } +static void +fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box) +{ + struct fd_resource *rsc = fd_resource(trans->base.resource); + struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); + enum pipe_format format = trans->base.resource->format; + + uint8_t *data = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + + ((trans->base.box.y + box->y) * slice->pitch + + trans->base.box.x + box->x) * rsc->cpp; + + uint8_t *source = trans->staging + + util_format_get_nblocksy(format, box->y) * trans->base.stride + + util_format_get_stride(format, box->x); + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + default: + assert(!"Unexpected format\n"); + break; + } +} + +static void +fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +{ + enum pipe_format format = trans->base.resource->format; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + fd_resource_flush_z32s8(trans, box); + break; + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + fd_resource_flush_rgtc(trans, box); + break; + default: + assert(!"Unexpected staging transfer type"); + break; + } +} + static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) @@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } - if (rsc->layer_first) { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * rsc->layer_size; - } else { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * slice->size0; - } + offset = slice->offset + + box->y / util_format_get_blockheight(format) * ptrans->stride + + box->x / util_format_get_blockwidth(format) * rsc->cpp + + fd_resource_layer_offset(rsc, slice, box->z); if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { + assert(trans->base.box.depth == 1); + trans->base.stride = trans->base.box.width * rsc->cpp * 2; trans->staging = malloc(trans->base.stride * trans->base.box.height); if (!trans->staging) @@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx, goto fail; float *depth = (float *)(buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + box->y * slice->pitch * 4 + box->x * 4); uint8_t *stencil = sbuf + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, box->z) + box->y * sslice->pitch + box->x; if (format != PIPE_FORMAT_X32_S8X24_UINT) @@ -314,6 +386,53 @@ fd_resource_transfer_map(struct pipe_context *pctx, box->width, box->height); } + buf = trans->staging; + offset = 0; + } else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) { + assert(trans->base.box.depth == 1); + + trans->base.stride = util_format_get_stride( + format, trans->base.box.width); + trans->staging = malloc( + util_format_get_2d_size(format, trans->base.stride, + trans->base.box.height)); + if (!trans->staging) + goto fail; + + /* if we're not discarding the whole range (or resource), we must copy + * the real data in. + */ + if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE))) { + uint8_t *rgba8 = (uint8_t *)buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + + box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp; + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + default: + assert(!"Unexpected format"); + break; + } + } + buf = trans->staging; offset = 0; } @@ -361,7 +480,7 @@ static const struct u_resource_vtbl fd_resource_vtbl = { }; static uint32_t -setup_slices(struct fd_resource *rsc, uint32_t alignment) +setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) { struct pipe_resource *prsc = &rsc->base.b; uint32_t level, size = 0; @@ -379,7 +498,7 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) slice->pitch = width = align(width, 32); slice->offset = size; - blocks = util_format_get_nblocks(prsc->format, width, height); + blocks = util_format_get_nblocks(format, width, height); /* 1d array and 2d array textures must all have the same layer size * for each miplevel on a3xx. 3d textures can have different layer * sizes for high levels, but the hw auto-sizer is buggy (or at least @@ -430,11 +549,12 @@ fd_resource_create(struct pipe_screen *pscreen, { struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct pipe_resource *prsc = &rsc->base.b; + enum pipe_format format = tmpl->format; uint32_t size; DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " "nr_samples=%u, usage=%u, bind=%x, flags=%x", - tmpl->target, util_format_name(tmpl->format), + tmpl->target, util_format_name(format), tmpl->width0, tmpl->height0, tmpl->depth0, tmpl->array_size, tmpl->last_level, tmpl->nr_samples, tmpl->usage, tmpl->bind, tmpl->flags); @@ -451,10 +571,13 @@ fd_resource_create(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); rsc->base.vtbl = &fd_resource_vtbl; - if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) - rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT); - else - rsc->cpp = util_format_get_blocksize(tmpl->format); + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + else if (util_format_description(format)->layout == + UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + rsc->cpp = util_format_get_blocksize(format); assert(rsc->cpp); @@ -469,7 +592,7 @@ fd_resource_create(struct pipe_screen *pscreen, } } - size = setup_slices(rsc, slice_alignment(pscreen, tmpl)); + size = setup_slices(rsc, slice_alignment(pscreen, tmpl), format); if (rsc->layer_first) { rsc->layer_size = align(size, 4096); diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c index 04e4643b4c9..f03b65b0ae5 100644 --- a/src/gallium/drivers/freedreno/freedreno_texture.c +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -212,6 +212,10 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr, if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) size = 16; + /* We fake RGTC as if it were RGBA8 */ + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) + size = 8; + if (chan->pure_integer && size > 16) bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];