mesa/src/panfrost/lib/pan_texture.c

615 lines
24 KiB
C

/*
* Copyright (C) 2008 VMware, Inc.
* Copyright (C) 2014 Broadcom
* Copyright (C) 2018-2019 Alyssa Rosenzweig
* Copyright (C) 2019-2020 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include "util/macros.h"
#include "util/u_math.h"
#include "pan_texture.h"
#if PAN_ARCH >= 5
/*
* Arm Scalable Texture Compression (ASTC) corresponds to just a few formats.
* The block dimension is not part of the format. Instead, it is encoded as a
* 6-bit tag on the payload pointer. Map the block size for a single dimension.
*/
static inline enum mali_astc_2d_dimension
panfrost_astc_dim_2d(unsigned dim)
{
switch (dim) {
case 4: return MALI_ASTC_2D_DIMENSION_4;
case 5: return MALI_ASTC_2D_DIMENSION_5;
case 6: return MALI_ASTC_2D_DIMENSION_6;
case 8: return MALI_ASTC_2D_DIMENSION_8;
case 10: return MALI_ASTC_2D_DIMENSION_10;
case 12: return MALI_ASTC_2D_DIMENSION_12;
default: unreachable("Invalid ASTC dimension");
}
}
static inline enum mali_astc_3d_dimension
panfrost_astc_dim_3d(unsigned dim)
{
switch (dim) {
case 3: return MALI_ASTC_3D_DIMENSION_3;
case 4: return MALI_ASTC_3D_DIMENSION_4;
case 5: return MALI_ASTC_3D_DIMENSION_5;
case 6: return MALI_ASTC_3D_DIMENSION_6;
default: unreachable("Invalid ASTC dimension");
}
}
#endif
/* Texture addresses are tagged with information about compressed formats.
* AFBC uses a bit for whether the colorspace transform is enabled (RGB and
* RGBA only).
* For ASTC, this is a "stretch factor" encoding the block size. */
static unsigned
panfrost_compression_tag(const struct util_format_description *desc,
enum mali_texture_dimension dim,
uint64_t modifier)
{
#if PAN_ARCH >= 5 && PAN_ARCH <= 8
if (drm_is_afbc(modifier)) {
unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ?
MALI_AFBC_SURFACE_FLAG_YTR : 0;
#if PAN_ARCH >= 6
/* Prefetch enable */
flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH;
if (panfrost_afbc_is_wide(modifier))
flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK;
#endif
#if PAN_ARCH >= 7
/* Tiled headers */
if (modifier & AFBC_FORMAT_MOD_TILED)
flags |= MALI_AFBC_SURFACE_FLAG_TILED_HEADER;
/* Used to make sure AFBC headers don't point outside the AFBC
* body. HW is using the AFBC surface stride to do this check,
* which doesn't work for 3D textures because the surface
* stride does not cover the body. Only supported on v7+.
*/
if (dim != MALI_TEXTURE_DIMENSION_3D)
flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE;
#endif
return flags;
} else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
if (desc->block.depth > 1) {
return (panfrost_astc_dim_3d(desc->block.depth) << 4) |
(panfrost_astc_dim_3d(desc->block.height) << 2) |
panfrost_astc_dim_3d(desc->block.width);
} else {
return (panfrost_astc_dim_2d(desc->block.height) << 3) |
panfrost_astc_dim_2d(desc->block.width);
}
}
#endif
/* Tags are not otherwise used */
return 0;
}
/* Cubemaps have 6 faces as "layers" in between each actual layer. We
* need to fix this up. TODO: logic wrong in the asserted out cases ...
* can they happen, perhaps from cubemap arrays? */
static void
panfrost_adjust_cube_dimensions(
unsigned *first_face, unsigned *last_face,
unsigned *first_layer, unsigned *last_layer)
{
*first_face = *first_layer % 6;
*last_face = *last_layer % 6;
*first_layer /= 6;
*last_layer /= 6;
assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
}
/* Following the texture descriptor is a number of descriptors. How many? */
static unsigned
panfrost_texture_num_elements(
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
unsigned nr_samples, bool is_cube)
{
unsigned first_face = 0, last_face = 0;
if (is_cube) {
panfrost_adjust_cube_dimensions(&first_face, &last_face,
&first_layer, &last_layer);
}
unsigned levels = 1 + last_level - first_level;
unsigned layers = 1 + last_layer - first_layer;
unsigned faces = 1 + last_face - first_face;
return levels * layers * faces * MAX2(nr_samples, 1);
}
/* Conservative estimate of the size of the texture payload a priori.
* Average case, size equal to the actual size. Worst case, off by 2x (if
* a manual stride is not needed on a linear texture). Returned value
* must be greater than or equal to the actual size, so it's safe to use
* as an allocation amount */
unsigned
GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview)
{
#if PAN_ARCH >= 9
size_t element_size = pan_size(PLANE);
#else
/* Assume worst case. Overestimates on Midgard, but that's ok. */
size_t element_size = pan_size(SURFACE_WITH_STRIDE);
#endif
unsigned elements =
panfrost_texture_num_elements(iview->first_level, iview->last_level,
iview->first_layer, iview->last_layer,
iview->image->layout.nr_samples,
iview->dim == MALI_TEXTURE_DIMENSION_CUBE);
return element_size * elements;
}
struct panfrost_surface_iter {
unsigned layer, last_layer;
unsigned level, first_level, last_level;
unsigned face, first_face, last_face;
unsigned sample, first_sample, last_sample;
};
static void
panfrost_surface_iter_begin(struct panfrost_surface_iter *iter,
unsigned first_layer, unsigned last_layer,
unsigned first_level, unsigned last_level,
unsigned first_face, unsigned last_face,
unsigned nr_samples)
{
iter->layer = first_layer;
iter->last_layer = last_layer;
iter->level = iter->first_level = first_level;
iter->last_level = last_level;
iter->face = iter->first_face = first_face;
iter->last_face = last_face;
iter->sample = iter->first_sample = 0;
iter->last_sample = nr_samples - 1;
}
static bool
panfrost_surface_iter_end(const struct panfrost_surface_iter *iter)
{
return iter->layer > iter->last_layer;
}
static void
panfrost_surface_iter_next(struct panfrost_surface_iter *iter)
{
#define INC_TEST(field) \
do { \
if (iter->field++ < iter->last_ ## field) \
return; \
iter->field = iter->first_ ## field; \
} while (0)
/* Ordering is different on v7: inner loop is iterating on levels */
if (PAN_ARCH >= 7)
INC_TEST(level);
INC_TEST(sample);
INC_TEST(face);
if (PAN_ARCH < 7)
INC_TEST(level);
iter->layer++;
#undef INC_TEST
}
static void
panfrost_get_surface_strides(const struct pan_image_layout *layout,
unsigned l,
int32_t *row_stride, int32_t *surf_stride)
{
const struct pan_image_slice_layout *slice = &layout->slices[l];
if (drm_is_afbc(layout->modifier)) {
/* Pre v7 don't have a row stride field. This field is
* repurposed as a Y offset which we don't use */
*row_stride = PAN_ARCH < 7 ? 0 : slice->row_stride;
*surf_stride = slice->afbc.surface_stride;
} else {
*row_stride = slice->row_stride;
*surf_stride = slice->surface_stride;
}
}
static mali_ptr
panfrost_get_surface_pointer(const struct pan_image_layout *layout,
enum mali_texture_dimension dim,
mali_ptr base,
unsigned l, unsigned w, unsigned f, unsigned s)
{
unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
unsigned offset;
if (layout->dim == MALI_TEXTURE_DIMENSION_3D) {
assert(!f && !s);
offset = layout->slices[l].offset +
(w * panfrost_get_layer_stride(layout, l));
} else {
offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s);
}
return base + offset;
}
#if PAN_ARCH >= 9
#define CLUMP_FMT(pipe, mali) [PIPE_FORMAT_ ## pipe] = MALI_CLUMP_FORMAT_ ## mali
static enum mali_clump_format special_clump_formats[PIPE_FORMAT_COUNT] = {
CLUMP_FMT(X32_S8X24_UINT, X32S8X24),
CLUMP_FMT(X24S8_UINT, X24S8),
CLUMP_FMT(S8X24_UINT, S8X24),
CLUMP_FMT(S8_UINT, S8),
CLUMP_FMT(L4A4_UNORM, L4A4),
CLUMP_FMT(L8A8_UNORM, L8A8),
CLUMP_FMT(L8A8_UINT, L8A8),
CLUMP_FMT(L8A8_SINT, L8A8),
CLUMP_FMT(A8_UNORM, A8),
CLUMP_FMT(A8_UINT, A8),
CLUMP_FMT(A8_SINT, A8),
CLUMP_FMT(ETC1_RGB8, ETC2_RGB8),
CLUMP_FMT(ETC2_RGB8, ETC2_RGB8),
CLUMP_FMT(ETC2_SRGB8, ETC2_RGB8),
CLUMP_FMT(ETC2_RGB8A1, ETC2_RGB8A1),
CLUMP_FMT(ETC2_SRGB8A1, ETC2_RGB8A1),
CLUMP_FMT(ETC2_RGBA8, ETC2_RGBA8),
CLUMP_FMT(ETC2_SRGBA8, ETC2_RGBA8),
CLUMP_FMT(ETC2_R11_UNORM, ETC2_R11_UNORM),
CLUMP_FMT(ETC2_R11_SNORM, ETC2_R11_SNORM),
CLUMP_FMT(ETC2_RG11_UNORM, ETC2_RG11_UNORM),
CLUMP_FMT(ETC2_RG11_SNORM, ETC2_RG11_SNORM),
CLUMP_FMT(DXT1_RGB, BC1_UNORM),
CLUMP_FMT(DXT1_RGBA, BC1_UNORM),
CLUMP_FMT(DXT1_SRGB, BC1_UNORM),
CLUMP_FMT(DXT1_SRGBA, BC1_UNORM),
CLUMP_FMT(DXT3_RGBA, BC2_UNORM),
CLUMP_FMT(DXT3_SRGBA, BC2_UNORM),
CLUMP_FMT(DXT5_RGBA, BC3_UNORM),
CLUMP_FMT(DXT5_SRGBA, BC3_UNORM),
CLUMP_FMT(RGTC1_UNORM, BC4_UNORM),
CLUMP_FMT(RGTC1_SNORM, BC4_SNORM),
CLUMP_FMT(RGTC2_UNORM, BC5_UNORM),
CLUMP_FMT(RGTC2_SNORM, BC5_SNORM),
CLUMP_FMT(BPTC_RGB_FLOAT, BC6H_SF16),
CLUMP_FMT(BPTC_RGB_UFLOAT, BC6H_UF16),
CLUMP_FMT(BPTC_RGBA_UNORM, BC7_UNORM),
CLUMP_FMT(BPTC_SRGBA, BC7_UNORM),
};
#undef CLUMP_FMT
static enum mali_clump_format
panfrost_clump_format(enum pipe_format format)
{
/* First, try a special clump format. Note that the 0 encoding is for a
* raw clump format, which will never be in the special table.
*/
if (special_clump_formats[format])
return special_clump_formats[format];
/* Else, it's a raw format. Raw formats must not be compressed. */
assert(!util_format_is_compressed(format));
/* Select the appropriate raw format. */
switch (util_format_get_blocksize(format)) {
case 1: return MALI_CLUMP_FORMAT_RAW8;
case 2: return MALI_CLUMP_FORMAT_RAW16;
case 3: return MALI_CLUMP_FORMAT_RAW24;
case 4: return MALI_CLUMP_FORMAT_RAW32;
case 6: return MALI_CLUMP_FORMAT_RAW48;
case 8: return MALI_CLUMP_FORMAT_RAW64;
case 12: return MALI_CLUMP_FORMAT_RAW96;
case 16: return MALI_CLUMP_FORMAT_RAW128;
default: unreachable("Invalid bpp");
}
}
static enum mali_afbc_superblock_size
translate_superblock_size(uint64_t modifier)
{
assert(drm_is_afbc(modifier));
switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
return MALI_AFBC_SUPERBLOCK_SIZE_16X16;
case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
return MALI_AFBC_SUPERBLOCK_SIZE_32X8;
case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
return MALI_AFBC_SUPERBLOCK_SIZE_64X4;
default:
unreachable("Invalid superblock size");
}
}
static void
panfrost_emit_plane(const struct pan_image_layout *layout,
enum pipe_format format,
mali_ptr pointer,
unsigned level,
void *payload)
{
const struct util_format_description *desc =
util_format_description(layout->format);
int32_t row_stride, surface_stride;
panfrost_get_surface_strides(layout, level, &row_stride, &surface_stride);
assert(row_stride >= 0 && surface_stride >= 0 && "negative stride");
bool afbc = drm_is_afbc(layout->modifier);
pan_pack(payload, PLANE, cfg) {
cfg.pointer = pointer;
cfg.row_stride = row_stride;
cfg.size = layout->data_size - layout->slices[level].offset;
cfg.slice_stride = layout->nr_samples ?
layout->slices[level].surface_stride :
panfrost_get_layer_stride(layout, level);
if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
assert(!afbc);
if (desc->block.depth > 1) {
cfg.plane_type = MALI_PLANE_TYPE_ASTC_3D;
cfg.astc._3d.block_width = panfrost_astc_dim_3d(desc->block.width);
cfg.astc._3d.block_height = panfrost_astc_dim_3d(desc->block.height);
cfg.astc._3d.block_depth = panfrost_astc_dim_3d(desc->block.depth);
} else {
cfg.plane_type = MALI_PLANE_TYPE_ASTC_2D;
cfg.astc._2d.block_width = panfrost_astc_dim_2d(desc->block.width);
cfg.astc._2d.block_height = panfrost_astc_dim_2d(desc->block.height);
}
bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
/* Mesa does not advertise _HDR formats yet */
cfg.astc.decode_hdr = false;
/* sRGB formats decode to RGBA8 sRGB, which is narrow.
*
* Non-sRGB formats decode to RGBA16F which is wide.
* With a future extension, we could decode non-sRGB
* formats narrowly too, but this isn't wired up in Mesa
* yet.
*/
cfg.astc.decode_wide = !srgb;
} else if (afbc) {
cfg.plane_type = MALI_PLANE_TYPE_AFBC;
cfg.afbc.superblock_size = translate_superblock_size(layout->modifier);
cfg.afbc.ytr = (layout->modifier & AFBC_FORMAT_MOD_YTR);
cfg.afbc.tiled_header = (layout->modifier & AFBC_FORMAT_MOD_TILED);
cfg.afbc.prefetch = true;
cfg.afbc.compression_mode = pan_afbc_compression_mode(format);
cfg.afbc.header_stride = layout->slices[level].afbc.header_size;
} else {
cfg.plane_type = MALI_PLANE_TYPE_GENERIC;
cfg.clump_format = panfrost_clump_format(format);
}
if (!afbc && layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
cfg.clump_ordering = MALI_CLUMP_ORDERING_TILED_U_INTERLEAVED;
else if (!afbc)
cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
}
}
#endif
static void
panfrost_emit_texture_payload(const struct pan_image_view *iview,
enum pipe_format format,
void *payload)
{
const struct pan_image_layout *layout = &iview->image->layout;
ASSERTED const struct util_format_description *desc =
util_format_description(format);
mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
if (iview->buf.size) {
assert (iview->dim == MALI_TEXTURE_DIMENSION_1D);
base += iview->buf.offset;
}
/* panfrost_compression_tag() wants the dimension of the resource, not the
* one of the image view (those might differ).
*/
base |= panfrost_compression_tag(desc, layout->dim, layout->modifier);
/* v4 does not support compression */
assert(PAN_ARCH >= 5 || !drm_is_afbc(layout->modifier));
assert(PAN_ARCH >= 5 || desc->layout != UTIL_FORMAT_LAYOUT_ASTC);
/* Inject the addresses in, interleaving array indices, mip levels,
* cube faces, and strides in that order. On Bifrost and older, each
* sample had its own surface descriptor; on Valhall, they are fused
* into a single plane descriptor.
*/
unsigned first_layer = iview->first_layer, last_layer = iview->last_layer;
unsigned nr_samples = PAN_ARCH <= 7 ? layout->nr_samples : 1;
unsigned first_face = 0, last_face = 0;
if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
panfrost_adjust_cube_dimensions(&first_face, &last_face,
&first_layer, &last_layer);
}
struct panfrost_surface_iter iter;
for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
iview->first_level, iview->last_level,
first_face, last_face, nr_samples);
!panfrost_surface_iter_end(&iter);
panfrost_surface_iter_next(&iter)) {
mali_ptr pointer =
panfrost_get_surface_pointer(layout, iview->dim, base,
iter.level, iter.layer,
iter.face, iter.sample);
#if PAN_ARCH >= 9
panfrost_emit_plane(layout, format, pointer, iter.level, payload);
payload += pan_size(PLANE);
#else
pan_pack(payload, SURFACE_WITH_STRIDE, cfg) {
cfg.pointer = pointer;
panfrost_get_surface_strides(layout, iter.level,
&cfg.row_stride,
&cfg.surface_stride);
}
payload += pan_size(SURFACE_WITH_STRIDE);
#endif
}
}
#if PAN_ARCH <= 7
/* Map modifiers to mali_texture_layout for packing in a texture descriptor */
static enum mali_texture_layout
panfrost_modifier_to_layout(uint64_t modifier)
{
if (drm_is_afbc(modifier))
return MALI_TEXTURE_LAYOUT_AFBC;
else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
return MALI_TEXTURE_LAYOUT_TILED;
else if (modifier == DRM_FORMAT_MOD_LINEAR)
return MALI_TEXTURE_LAYOUT_LINEAR;
else
unreachable("Invalid modifer");
}
#endif
/*
* Generates a texture descriptor. Ideally, descriptors are immutable after the
* texture is created, so we can keep these hanging around in GPU memory in a
* dedicated BO and not have to worry. In practice there are some minor gotchas
* with this (the driver sometimes will change the format of a texture on the
* fly for compression) but it's fast enough to just regenerate the descriptor
* in those cases, rather than monkeypatching at drawtime. A texture descriptor
* consists of a 32-byte header followed by pointers.
*/
void
GENX(panfrost_new_texture)(const struct panfrost_device *dev,
const struct pan_image_view *iview,
void *out, const struct panfrost_ptr *payload)
{
const struct pan_image_layout *layout = &iview->image->layout;
enum pipe_format format = iview->format;
unsigned swizzle;
if (PAN_ARCH >= 7 && util_format_is_depth_or_stencil(format)) {
/* v7+ doesn't have an _RRRR component order, combine the
* user swizzle with a .XXXX swizzle to emulate that.
*/
static const unsigned char replicate_x[4] = {
PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
};
unsigned char patched_swizzle[4];
util_format_compose_swizzles(replicate_x,
iview->swizzle,
patched_swizzle);
swizzle = panfrost_translate_swizzle_4(patched_swizzle);
} else {
swizzle = panfrost_translate_swizzle_4(iview->swizzle);
}
panfrost_emit_texture_payload(iview, format, payload->cpu);
unsigned array_size = iview->last_layer - iview->first_layer + 1;
if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
assert(iview->first_layer % 6 == 0);
assert(iview->last_layer % 6 == 5);
array_size /= 6;
}
unsigned width;
if (iview->buf.size) {
assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
assert(!iview->first_level && !iview->last_level);
assert(!iview->first_layer && !iview->last_layer);
assert(layout->nr_samples == 1);
assert(layout->height == 1 && layout->depth == 1);
assert(iview->buf.offset + iview->buf.size <= layout->width);
width = iview->buf.size;
} else {
width = u_minify(layout->width, iview->first_level);
}
pan_pack(out, TEXTURE, cfg) {
cfg.dimension = iview->dim;
cfg.format = dev->formats[format].hw;
cfg.width = width;
cfg.height = u_minify(layout->height, iview->first_level);
if (iview->dim == MALI_TEXTURE_DIMENSION_3D)
cfg.depth = u_minify(layout->depth, iview->first_level);
else
cfg.sample_count = layout->nr_samples;
cfg.swizzle = swizzle;
#if PAN_ARCH >= 9
cfg.texel_interleave =
(layout->modifier != DRM_FORMAT_MOD_LINEAR) ||
util_format_is_compressed(format);
#else
cfg.texel_ordering =
panfrost_modifier_to_layout(layout->modifier);
#endif
cfg.levels = iview->last_level - iview->first_level + 1;
cfg.array_size = array_size;
#if PAN_ARCH >= 6
cfg.surfaces = payload->gpu;
/* We specify API-level LOD clamps in the sampler descriptor
* and use these clamps simply for bounds checking */
cfg.minimum_lod = FIXED_16(0, false);
cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
#else
cfg.manual_stride = true;
#endif
}
}