panfrost: Rewrite texture descriptor creation logic

Rather than creating partially within the Gallium create function and
monkeypatching on draw time with code split across N different files
with tight Gallium dependencies, let's streamline everything into a
series of maintainable routines in mesa/src/panfrost with no Gallium
dependencies, doing the entire texture creation in one-shot and thus
adding absolutely zero draw-time overhead (since we can allocate a BO
for the descriptor and upload ahead-of-time, so switching textures is as
cheap as switching pointers).

Was this worth it? You know, I'm not sure :|

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3858>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3858>
This commit is contained in:
Alyssa Rosenzweig 2020-02-18 14:20:16 -05:00
parent ad44f587a8
commit b929565ea8
5 changed files with 303 additions and 165 deletions

View File

@ -475,95 +475,19 @@ panfrost_upload_tex(
struct pipe_sampler_view *pview = &view->base;
struct panfrost_resource *rsrc = pan_resource(pview->texture);
mali_ptr descriptor_gpu;
void *descriptor;
/* Do we interleave an explicit stride with every element? */
bool has_manual_stride = view->manual_stride;
/* For easy access */
bool is_buffer = pview->target == PIPE_BUFFER;
unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level;
unsigned last_level = is_buffer ? 0 : pview->u.tex.last_level;
unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer;
unsigned last_layer = is_buffer ? 0 : pview->u.tex.last_layer;
unsigned first_face = 0;
unsigned last_face = 0;
unsigned face_mult = 1;
/* Cubemaps have 6 faces as layers in between each actual layer.
* There's a bit of an impedence mismatch between Gallium and the
* hardware, let's fixup for it */
if (pview->target == PIPE_TEXTURE_CUBE || pview->target == PIPE_TEXTURE_CUBE_ARRAY) {
/* TODO: logic wrong in the asserted out cases ... can they happen? */
first_face = first_layer % 6;
last_face = last_layer % 6;
first_layer /= 6;
last_layer /= 6;
assert((first_layer == last_layer) || (first_face == 0 && last_face == 5));
face_mult = 6;
}
/* Lower-bit is set when sampling from colour AFBC */
bool is_afbc = rsrc->layout == MALI_TEXTURE_AFBC;
bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL;
unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0;
/* Add the BO to the job so it's retained until the job is done. */
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
panfrost_batch_add_bo(batch, rsrc->bo,
PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
panfrost_bo_access_for_stage(st));
/* Add the usage flags in, since they can change across the CSO
* lifetime due to layout switches */
panfrost_batch_add_bo(batch, view->bo,
PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
panfrost_bo_access_for_stage(st));
view->hw.format.layout = rsrc->layout;
view->hw.format.manual_stride = has_manual_stride;
/* Inject the addresses in, interleaving array indices, mip levels,
* cube faces, and strides in that order */
unsigned idx = 0;
unsigned levels = 1 + last_level - first_level;
unsigned layers = 1 + last_layer - first_layer;
unsigned faces = 1 + last_face - first_face;
unsigned num_elements = levels * layers * faces;
if (has_manual_stride)
num_elements *= 2;
descriptor = malloc(sizeof(struct mali_texture_descriptor) +
sizeof(mali_ptr) * num_elements);
memcpy(descriptor, &view->hw, sizeof(struct mali_texture_descriptor));
mali_ptr *pointers_and_strides = descriptor +
sizeof(struct mali_texture_descriptor);
for (unsigned w = first_layer; w <= last_layer; ++w) {
for (unsigned l = first_level; l <= last_level; ++l) {
for (unsigned f = first_face; f <= last_face; ++f) {
pointers_and_strides[idx++] =
panfrost_get_texture_address(rsrc, l, w * face_mult + f)
+ afbc_bit + view->astc_stretch;
if (has_manual_stride) {
pointers_and_strides[idx++] =
rsrc->slices[l].stride;
}
}
}
}
descriptor_gpu = panfrost_upload_transient(batch, descriptor,
sizeof(struct mali_texture_descriptor) +
num_elements * sizeof(*pointers_and_strides));
free(descriptor);
return descriptor_gpu;
return view->bo->gpu;
}
static void
@ -2052,29 +1976,14 @@ panfrost_translate_texture_type(enum pipe_texture_target t) {
}
}
static uint8_t
panfrost_compute_astc_stretch(
const struct util_format_description *desc)
{
unsigned width = desc->block.width;
unsigned height = desc->block.height;
assert(width >= 4 && width <= 12);
assert(height >= 4 && height <= 12);
if (width == 12)
width = 11;
if (height == 12)
height = 11;
return ((height - 4) * 8) + (width - 4);
}
static struct pipe_sampler_view *
panfrost_create_sampler_view(
struct pipe_context *pctx,
struct pipe_resource *texture,
const struct pipe_sampler_view *template)
{
struct panfrost_screen *screen = pan_screen(pctx->screen);
struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
int bytes_per_pixel = util_format_get_blocksize(texture->format);
pipe_reference(NULL, &texture->reference);
@ -2086,12 +1995,6 @@ panfrost_create_sampler_view(
so->base.reference.count = 1;
so->base.context = pctx;
/* sampler_views correspond to texture descriptors, minus the texture
* (data) itself. So, we serialise the descriptor here and cache it for
* later. */
const struct util_format_description *desc = util_format_description(prsrc->base.format);
unsigned char user_swizzle[4] = {
template->swizzle_r,
template->swizzle_g,
@ -2099,32 +2002,6 @@ panfrost_create_sampler_view(
template->swizzle_a
};
enum mali_format format = panfrost_find_format(desc);
if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
so->astc_stretch = panfrost_compute_astc_stretch(desc);
/* Check if we need to set a custom stride by computing the "expected"
* stride and comparing it to what the BO actually wants. Only applies
* to linear textures, since tiled/compressed textures have strict
* alignment requirements for their strides as it is */
unsigned first_level = template->u.tex.first_level;
unsigned last_level = template->u.tex.last_level;
if (prsrc->layout == MALI_TEXTURE_LINEAR) {
for (unsigned l = first_level; l <= last_level; ++l) {
unsigned actual_stride = prsrc->slices[l].stride;
unsigned width = u_minify(texture->width0, l);
unsigned comp_stride = width * bytes_per_pixel;
if (comp_stride != actual_stride) {
so->manual_stride = true;
break;
}
}
}
/* In the hardware, array_size refers specifically to array textures,
* whereas in Gallium, it also covers cubemaps */
@ -2136,26 +2013,32 @@ panfrost_create_sampler_view(
array_size /= 6;
}
struct mali_texture_descriptor texture_descriptor = {
.width = MALI_POSITIVE(u_minify(texture->width0, first_level)),
.height = MALI_POSITIVE(u_minify(texture->height0, first_level)),
.depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)),
.array_size = MALI_POSITIVE(array_size),
enum mali_texture_type type =
panfrost_translate_texture_type(template->target);
.format = {
.swizzle = panfrost_translate_swizzle_4(desc->swizzle),
.format = format,
.srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB,
.type = panfrost_translate_texture_type(template->target),
.unknown2 = 0x1,
},
unsigned size = panfrost_estimate_texture_size(
template->u.tex.first_level,
template->u.tex.last_level,
template->u.tex.first_layer,
template->u.tex.last_layer,
type, prsrc->layout);
.swizzle = panfrost_translate_swizzle_4(user_swizzle)
};
so->bo = panfrost_bo_create(screen, size, 0);
texture_descriptor.levels = last_level - first_level;
so->hw = texture_descriptor;
panfrost_new_texture(
so->bo->cpu,
texture->width0, texture->height0,
texture->depth0, array_size,
texture->format,
type, prsrc->layout,
template->u.tex.first_level,
template->u.tex.last_level,
template->u.tex.first_layer,
template->u.tex.last_layer,
prsrc->cubemap_stride,
panfrost_translate_swizzle_4(user_swizzle),
prsrc->bo->gpu,
prsrc->slices);
return (struct pipe_sampler_view *) so;
}
@ -2190,9 +2073,12 @@ panfrost_set_sampler_views(
static void
panfrost_sampler_view_destroy(
struct pipe_context *pctx,
struct pipe_sampler_view *view)
struct pipe_sampler_view *pview)
{
pipe_resource_reference(&view->texture, NULL);
struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
pipe_resource_reference(&pview->texture, NULL);
panfrost_bo_unreference(view->bo);
ralloc_free(view);
}

View File

@ -262,9 +262,7 @@ struct panfrost_sampler_state {
struct panfrost_sampler_view {
struct pipe_sampler_view base;
struct mali_texture_descriptor hw;
uint8_t astc_stretch;
bool manual_stride;
struct panfrost_bo *bo;
};
static inline struct panfrost_context *

View File

@ -533,15 +533,6 @@ panfrost_resource_destroy(struct pipe_screen *screen,
ralloc_free(rsrc);
}
static unsigned
panfrost_get_layer_stride(struct panfrost_resource *rsrc, unsigned level)
{
if (rsrc->base.target == PIPE_TEXTURE_3D)
return rsrc->slices[level].size0;
else
return rsrc->cubemap_stride;
}
static void *
panfrost_transfer_map(struct pipe_context *pctx,
struct pipe_resource *resource,
@ -645,7 +636,9 @@ panfrost_transfer_map(struct pipe_context *pctx,
return transfer->map;
} else {
transfer->base.stride = rsrc->slices[level].stride;
transfer->base.layer_stride = panfrost_get_layer_stride(rsrc, level);
transfer->base.layer_stride = panfrost_get_layer_stride(
rsrc->slices, rsrc->base.target == PIPE_TEXTURE_3D,
rsrc->cubemap_stride, level);
/* By mapping direct-write, we're implicitly already
* initialized (maybe), so be conservative */
@ -792,10 +785,8 @@ panfrost_get_texture_address(
struct panfrost_resource *rsrc,
unsigned level, unsigned face)
{
unsigned level_offset = rsrc->slices[level].offset;
unsigned face_offset = face * panfrost_get_layer_stride(rsrc, level);
return rsrc->bo->gpu + level_offset + face_offset;
bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
return rsrc->bo->gpu + panfrost_texture_offset(rsrc->slices, is_3d, rsrc->cubemap_stride, level, face);
}
/* Given a resource that has already been allocated, hint that it should use a
@ -856,6 +847,8 @@ panfrost_resource_hint_layout(
panfrost_bo_unreference(rsrc->bo);
rsrc->bo = panfrost_bo_create(screen, new_size, PAN_BO_DELAY_MMAP);
}
/* TODO: If there are textures bound, regenerate their descriptors */
}
static void

View File

@ -26,8 +26,222 @@
*/
#include "util/macros.h"
#include "util/u_math.h"
#include "pan_texture.h"
/* Generates a texture descriptor. Ideally, descriptors are immutable after the
* texture is created, so we can keep these hanging around in GPU memory in a
* dedicated BO and not have to worry. In practice there are some minor gotchas
* with this (the driver sometimes will change the format of a texture on the
* fly for compression) but it's fast enough to just regenerate the descriptor
* in those cases, rather than monkeypatching at drawtime.
*
* A texture descriptor consists of a 32-byte mali_texture_descriptor structure
* followed by a variable number of pointers. Due to this variance and
* potentially large size, we actually upload directly rather than returning
* the descriptor. Whether the user does a copy themselves or not is irrelevant
* to us here.
*/
/* Check if we need to set a custom stride by computing the "expected"
* stride and comparing it to what the user actually wants. Only applies
* to linear textures, since tiled/compressed textures have strict
* alignment requirements for their strides as it is */
static bool
panfrost_needs_explicit_stride(
struct panfrost_slice *slices,
uint16_t width,
unsigned first_level, unsigned last_level,
unsigned bytes_per_pixel)
{
for (unsigned l = first_level; l <= last_level; ++l) {
unsigned actual = slices[l].stride;
unsigned expected = u_minify(width, l) * bytes_per_pixel;
if (actual != expected)
return true;
}
return false;
}
/* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
* in the hardware, but in fact can be parametrized to have various widths and
* heights for the so-called "stretch factor". It turns out these parameters
* are stuffed in the bottom bits of the payload pointers. This functions
* computes these magic stuffing constants based on the ASTC format in use. The
* constant in a given dimension is 3-bits, and two are stored side-by-side for
* each active dimension.
*/
static unsigned
panfrost_astc_stretch(unsigned dim)
{
assert(dim >= 4 && dim <= 12);
return MIN2(dim, 11) - 4;
}
/* Texture addresses are tagged with information about AFBC (colour AFBC?) xor
* ASTC (stretch factor) if in use. */
static unsigned
panfrost_compression_tag(
const struct util_format_description *desc,
enum mali_format format, enum mali_texture_layout layout)
{
if (layout == MALI_TEXTURE_AFBC)
return util_format_has_depth(desc) ? 0x0 : 0x1;
else if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
return (panfrost_astc_stretch(desc->block.height) << 3) |
panfrost_astc_stretch(desc->block.width);
else
return 0;
}
/* Cubemaps have 6 faces as "layers" in between each actual layer. We
* need to fix this up. TODO: logic wrong in the asserted out cases ...
* can they happen, perhaps from cubemap arrays? */
static void
panfrost_adjust_cube_dimensions(
unsigned *first_face, unsigned *last_face,
unsigned *first_layer, unsigned *last_layer)
{
*first_face = *first_layer % 6;
*last_face = *last_layer % 6;
*first_layer /= 6;
*last_layer /= 6;
assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
}
/* Following the texture descriptor is a number of pointers. How many? */
static unsigned
panfrost_texture_num_elements(
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
bool is_cube, bool manual_stride)
{
unsigned first_face = 0, last_face = 0;
if (is_cube) {
panfrost_adjust_cube_dimensions(&first_face, &last_face,
&first_layer, &last_layer);
}
unsigned levels = 1 + last_level - first_level;
unsigned layers = 1 + last_layer - first_layer;
unsigned faces = 1 + last_face - first_face;
unsigned num_elements = levels * layers * faces;
if (manual_stride)
num_elements *= 2;
return num_elements;
}
/* Conservative estimate of the size of the texture descriptor a priori.
* Average case, size equal to the actual size. Worst case, off by 2x (if
* a manual stride is not needed on a linear texture). Returned value
* must be greater than or equal to the actual size, so it's safe to use
* as an allocation amount */
unsigned
panfrost_estimate_texture_size(
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
enum mali_texture_type type, enum mali_texture_layout layout)
{
/* Assume worst case */
unsigned manual_stride = (layout == MALI_TEXTURE_LINEAR);
unsigned elements = panfrost_texture_num_elements(
first_level, last_level,
first_layer, last_layer,
type == MALI_TEX_CUBE, manual_stride);
return sizeof(struct mali_texture_descriptor) +
sizeof(mali_ptr) * elements;
}
void
panfrost_new_texture(
void *out,
uint16_t width, uint16_t height,
uint16_t depth, uint16_t array_size,
enum pipe_format format,
enum mali_texture_type type,
enum mali_texture_layout layout,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
unsigned cube_stride,
unsigned swizzle,
mali_ptr base,
struct panfrost_slice *slices)
{
const struct util_format_description *desc =
util_format_description(format);
unsigned bytes_per_pixel = util_format_get_blocksize(format);
enum mali_format mali_format = panfrost_find_format(desc);
bool manual_stride = (layout == MALI_TEXTURE_LINEAR)
&& panfrost_needs_explicit_stride(slices, width,
first_level, last_level, bytes_per_pixel);
struct mali_texture_descriptor descriptor = {
.width = MALI_POSITIVE(u_minify(width, first_level)),
.height = MALI_POSITIVE(u_minify(height, first_level)),
.depth = MALI_POSITIVE(u_minify(depth, first_level)),
.array_size = MALI_POSITIVE(array_size),
.format = {
.swizzle = panfrost_translate_swizzle_4(desc->swizzle),
.format = mali_format,
.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB),
.type = type,
.layout = layout,
.manual_stride = manual_stride,
.unknown2 = 1,
},
.levels = last_level - first_level,
.swizzle = swizzle
};
memcpy(out, &descriptor, sizeof(descriptor));
base |= panfrost_compression_tag(desc, mali_format, layout);
/* Inject the addresses in, interleaving array indices, mip levels,
* cube faces, and strides in that order */
unsigned first_face = 0, last_face = 0, face_mult = 1;
if (type == MALI_TEX_CUBE) {
face_mult = 6;
panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
}
mali_ptr *payload = (mali_ptr *) (out + sizeof(struct mali_texture_descriptor));
unsigned idx = 0;
for (unsigned w = first_layer; w <= last_layer; ++w) {
for (unsigned l = first_level; l <= last_level; ++l) {
for (unsigned f = first_face; f <= last_face; ++f) {
payload[idx++] = base + panfrost_texture_offset(
slices, type == MALI_TEX_3D,
cube_stride, l, w * face_mult + f);
if (manual_stride)
payload[idx++] = slices[l].stride;
}
}
}
}
/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
* Checksumming is believed to be a CRC variant (CRC64 based on the size?).
* This feature is also known as "transaction elimination". */
@ -52,3 +266,19 @@ panfrost_compute_checksum_size(
return slice->checksum_stride * tile_count_y;
}
unsigned
panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
{
return is_3d ? slices[level].size0 : cube_stride;
}
/* Computes the offset into a texture at a particular level/face. Add to
* the base address of a texture to get the address to that level/face */
unsigned
panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face)
{
unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
return slices[level].offset + (face * layer_stride);
}

View File

@ -30,6 +30,7 @@
#include <stdbool.h>
#include "util/format/u_format.h"
#include "panfrost-job.h"
struct panfrost_slice {
unsigned offset;
@ -63,6 +64,36 @@ panfrost_format_supports_afbc(enum pipe_format format);
unsigned
panfrost_afbc_header_size(unsigned width, unsigned height);
/* mali_texture_descriptor */
unsigned
panfrost_estimate_texture_size(
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
enum mali_texture_type type, enum mali_texture_layout layout);
void
panfrost_new_texture(
void *out,
uint16_t width, uint16_t height,
uint16_t depth, uint16_t array_size,
enum pipe_format format,
enum mali_texture_type type,
enum mali_texture_layout layout,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
unsigned cube_stride,
unsigned swizzle,
mali_ptr base,
struct panfrost_slice *slices);
unsigned
panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level);
unsigned
panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face);
/* Formats */
enum mali_format