panfrost: Preliminary work for mipmaps

This patch refactors a substantial amount of code in preparation for
mipmaps. In particular, we know have a correct slice abstraction based
on offsets; cpu/gpu are no longer arbitrary pointers. We additionally
shuffle around other code to accompany these changes and cleanup how
tiled textures are handled, while drawing some attention to the blit
code.

Mipmaps are still disabled at this point, as autogeneration is not yet
implemented; enabling as-is would cause regressions.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2019-03-21 02:54:38 +00:00
parent 04a72391f3
commit 6170814c42
8 changed files with 165 additions and 209 deletions

View File

@ -85,8 +85,8 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr
/* Compressed textured reads use a tagged pointer to the metadata */
rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1);
rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu;
rsrc->bo->gpu = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1);
rsrc->bo->cpu = rsrc->bo->afbc_slab.cpu;
}
static void
@ -772,10 +772,10 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
* rsrc->gpu. However, attribute buffers must be 64 aligned. If
* it is not, for now we have to duplicate the buffer. */
mali_ptr effective_address = (rsrc->bo->gpu[0] + buf->buffer_offset);
mali_ptr effective_address = (rsrc->bo->gpu + buf->buffer_offset);
if (effective_address & 0x3F) {
attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu[0] + buf->buffer_offset, attrs[i].size) | 1;
attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu + buf->buffer_offset, attrs[i].size) | 1;
} else {
attrs[i].elements = effective_address | 1;
}
@ -1018,31 +1018,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc;
/* Inject the address in. */
for (int l = 0; l < (tex_rsrc->last_level + 1); ++l)
ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] = rsrc->bo->gpu[l];
/* Workaround maybe-errata (?) with non-mipmaps */
int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels;
if (!rsrc->bo->is_mipmap) {
if (ctx->is_t6xx) {
/* HW ERRATA, not needed after t6XX */
ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0];
ctx->sampler_views[t][i]->hw.unknown3A = 1;
}
ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0;
for (int l = 0; l <= tex_rsrc->last_level; ++l) {
ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] =
rsrc->bo->gpu + rsrc->bo->slices[l].offset;
}
trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor));
/* Restore */
ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s;
if (ctx->is_t6xx) {
ctx->sampler_views[t][i]->hw.unknown3A = 0;
}
}
mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
@ -1381,7 +1362,7 @@ panfrost_get_index_buffer_raw(const struct pipe_draw_info *info)
return (const uint8_t *) info->index.user;
} else {
struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
return (const uint8_t *) rsrc->bo->cpu[0];
return (const uint8_t *) rsrc->bo->cpu;
}
}
@ -1397,7 +1378,7 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe
if (!info->has_user_indices) {
/* Only resources can be directly mapped */
return rsrc->bo->gpu[0] + offset;
return rsrc->bo->gpu + offset;
} else {
/* Otherwise, we need to upload to transient memory */
const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info);
@ -1681,8 +1662,8 @@ panfrost_create_sampler_state(
cso->border_color.f[2],
cso->border_color.f[3]
},
.min_lod = FIXED_16(0.0),
.max_lod = FIXED_16(31.0),
.min_lod = FIXED_16(cso->min_lod),
.max_lod = FIXED_16(cso->max_lod),
.unknown2 = 1,
};
@ -1875,7 +1856,7 @@ panfrost_set_constant_buffer(
struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer);
if (rsrc) {
cpu = rsrc->bo->cpu[0];
cpu = rsrc->bo->cpu;
} else if (buf->user_buffer) {
cpu = buf->user_buffer;
} else {
@ -1982,7 +1963,11 @@ panfrost_create_sampler_view(
/* TODO: Other base levels require adjusting dimensions / level numbers / etc */
assert (template->u.tex.first_level == 0);
texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level;
/* Disable mipmapping for now to avoid regressions while automipmapping
* is being implemented. TODO: Remove me once automipmaps work */
//texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level;
texture_descriptor.nr_mipmap_levels = 0;
so->hw = texture_descriptor;

View File

@ -136,7 +136,7 @@ panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *wha
assert(!ret);
bo->gem_handle = gem_handle;
bo->gpu[0] = (mali_ptr) get_bo_offset.offset;
bo->gpu = (mali_ptr) get_bo_offset.offset;
// TODO map and unmap on demand?
mmap_bo.handle = gem_handle;
@ -146,17 +146,17 @@ panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *wha
assert(0);
}
bo->size[0] = lseek(whandle->handle, 0, SEEK_END);
assert(bo->size[0] > 0);
bo->cpu[0] = mmap(NULL, bo->size[0], PROT_READ | PROT_WRITE, MAP_SHARED,
bo->size = lseek(whandle->handle, 0, SEEK_END);
assert(bo->size > 0);
bo->cpu = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
drm->fd, mmap_bo.offset);
if (bo->cpu[0] == MAP_FAILED) {
fprintf(stderr, "mmap failed: %p\n", bo->cpu[0]);
if (bo->cpu == MAP_FAILED) {
fprintf(stderr, "mmap failed: %p\n", bo->cpu);
assert(0);
}
/* Record the mmap if we're tracing */
pantrace_mmap(bo->gpu[0], bo->cpu[0], bo->size[0], NULL);
pantrace_mmap(bo->gpu, bo->cpu, bo->size, NULL);
return bo;
}
@ -196,7 +196,7 @@ panfrost_drm_free_imported_bo(struct panfrost_screen *screen, struct panfrost_bo
}
bo->gem_handle = -1;
bo->gpu[0] = (mali_ptr)NULL;
bo->gpu = (mali_ptr)NULL;
}
static int

View File

@ -81,14 +81,14 @@ panfrost_mfbd_set_cbuf(
bool flip_y)
{
struct panfrost_resource *rsrc = pan_resource(surf->texture);
int stride = rsrc->bo->stride;
int stride = rsrc->bo->slices[0].stride;
rt->format = panfrost_mfbd_format(surf);
/* Now, we set the layout specific pieces */
if (rsrc->bo->layout == PAN_LINEAR) {
mali_ptr framebuffer = rsrc->bo->gpu[0];
mali_ptr framebuffer = rsrc->bo->gpu;
if (flip_y) {
framebuffer += stride * (surf->texture->height0 - 1);
@ -145,8 +145,8 @@ panfrost_mfbd_set_zsbuf(
fb->unk3 |= MALI_MFBD_EXTRA;
fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
fbx->ds_linear.depth = rsrc->bo->gpu[0];
fbx->ds_linear.depth_stride = rsrc->bo->stride;
fbx->ds_linear.depth = rsrc->bo->gpu;
fbx->ds_linear.depth_stride = rsrc->bo->slices[0].stride;
} else {
assert(0);
}
@ -273,7 +273,7 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
fb.unk3 |= MALI_MFBD_EXTRA;
fbx.flags |= MALI_EXTRA_PRESENT;
fbx.checksum_stride = rsrc->bo->checksum_stride;
fbx.checksum = rsrc->bo->gpu[0] + rsrc->bo->stride * rsrc->base.height0;
fbx.checksum = rsrc->bo->gpu + rsrc->bo->slices[0].stride * rsrc->base.height0;
}
}

View File

@ -68,7 +68,7 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen,
prsc->screen = pscreen;
rsc->bo = screen->driver->import_bo(screen, whandle);
rsc->bo->stride = whandle->stride;
rsc->bo->slices[0].stride = whandle->stride;
if (screen->ro) {
rsc->scanout =
@ -99,7 +99,7 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen,
return TRUE;
handle->handle = rsrc->bo->gem_handle;
handle->stride = rsrc->bo->stride;
handle->stride = rsrc->bo->slices[0].stride;
return TRUE;
} else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
if (scanout) {
@ -117,7 +117,7 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen,
return TRUE;
} else
return screen->driver->export_bo(screen, rsrc->bo->gem_handle, rsrc->bo->stride, handle);
return screen->driver->export_bo(screen, rsrc->bo->gem_handle, rsrc->bo->slices[0].stride, handle);
}
return FALSE;
@ -133,8 +133,12 @@ static void
panfrost_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info)
{
/* STUB */
DBG("Skipping blit XXX\n");
if (util_try_blit_via_copy_region(pipe, info))
return;
/* TODO */
DBG("Unhandled blit.\n");
return;
}
@ -183,25 +187,45 @@ panfrost_surface_destroy(struct pipe_context *pipe,
free(surf);
}
static void
panfrost_setup_slices(const struct pipe_resource *tmpl, struct panfrost_bo *bo)
{
unsigned width = tmpl->width0;
unsigned height = tmpl->height0;
unsigned bytes_per_pixel = util_format_get_blocksize(tmpl->format);
unsigned offset = 0;
for (unsigned l = 0; l <= tmpl->last_level; ++l) {
struct panfrost_slice *slice = &bo->slices[l];
unsigned effective_width = width;
unsigned effective_height = height;
/* Tiled operates blockwise; linear is packed */
if (bo->layout == PAN_TILED) {
effective_width = ALIGN(effective_width, 16);
effective_height = ALIGN(effective_height, 16);
}
slice->offset = offset;
slice->stride = bytes_per_pixel * effective_width;
offset += slice->stride * effective_height;
width = u_minify(width, 1);
height = u_minify(height, 1);
}
bo->size = ALIGN(offset, 4096);
}
static struct panfrost_bo *
panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *template)
{
struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
/* Calculate the size of the bo */
int bytes_per_pixel = util_format_get_blocksize(template->format);
int stride = ALIGN(template->width0, 16) * bytes_per_pixel;
size_t sz = stride;
if (template->height0) sz *= template->height0;
if (template->depth0) sz *= template->depth0;
/* Depth buffers require extra space for unknown reasons */
if (template->bind & PIPE_BIND_DEPTH_STENCIL)
sz = sz + sz/256;
/* Based on the usage, figure out what storing will be used. There are
* various tradeoffs:
*
@ -229,31 +253,18 @@ panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *t
/* Set the layout appropriately */
bo->layout = should_tile ? PAN_TILED : PAN_LINEAR;
if (bo->layout == PAN_TILED) {
/* For tiled, we don't map directly, so just malloc any old buffer */
for (int l = 0; l < (template->last_level + 1); ++l) {
bo->cpu[l] = malloc(sz);
bo->size[l] = sz;
}
} else {
/* For a linear resource, allocate a block of memory from
* kernel space */
panfrost_setup_slices(template, bo);
if (bo->layout == PAN_TILED || bo->layout == PAN_LINEAR) {
struct panfrost_memory mem;
bo->size[0] = ALIGN(sz, 4096);
screen->driver->allocate_slab(screen, &mem, bo->size[0] / 4096, true, 0, 0, 0);
screen->driver->allocate_slab(screen, &mem, bo->size / 4096, true, 0, 0, 0);
bo->cpu[0] = mem.cpu;
bo->gpu[0] = mem.gpu;
bo->cpu = mem.cpu;
bo->gpu = mem.gpu;
bo->gem_handle = mem.gem_handle;
/* TODO: Mipmap */
}
bo->stride = stride;
return bo;
}
@ -317,27 +328,18 @@ panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *pbo)
{
struct panfrost_bo *bo = (struct panfrost_bo *)pbo;
if (bo->layout == PAN_LINEAR && !bo->imported) {
/* Construct a memory object for all mip levels */
if ((bo->layout == PAN_LINEAR || bo->layout == PAN_TILED) &&
!bo->imported) {
struct panfrost_memory mem = {
.cpu = bo->cpu[0],
.gpu = bo->gpu[0],
.size = bo->size[0],
.cpu = bo->cpu,
.gpu = bo->gpu,
.size = bo->size,
.gem_handle = bo->gem_handle,
};
screen->driver->free_slab(screen, &mem);
}
if (bo->layout == PAN_TILED) {
/* Tiled has a malloc'd CPU, so just plain ol' free needed */
for (int l = 0; l < MAX_MIP_LEVELS; ++l) {
free(bo->cpu[l]);
}
}
if (bo->layout == PAN_AFBC) {
/* TODO */
DBG("--leaking afbc (%d bytes)--\n", bo->afbc_metadata_size);
@ -369,30 +371,6 @@ panfrost_resource_destroy(struct pipe_screen *screen,
FREE(rsrc);
}
static uint8_t *
panfrost_map_bo(struct panfrost_context *ctx, struct pipe_transfer *transfer)
{
struct panfrost_bo *bo = (struct panfrost_bo *)pan_resource(transfer->resource)->bo;
/* If non-zero level, it's a mipmapped resource and needs to be treated as such */
bo->is_mipmap |= transfer->level;
if (transfer->usage & PIPE_TRANSFER_MAP_DIRECTLY && bo->layout != PAN_LINEAR) {
/* We can only directly map linear resources */
return NULL;
}
if (transfer->resource->bind & PIPE_BIND_DEPTH_STENCIL) {
/* Mipmapped readpixels?! */
assert(transfer->level == 0);
/* Set the CPU mapping to that of the depth/stencil buffer in memory, untiled */
bo->cpu[transfer->level] = ctx->depth_stencil_buffer.cpu;
}
return bo->cpu[transfer->level];
}
static void *
panfrost_transfer_map(struct pipe_context *pctx,
struct pipe_resource *resource,
@ -401,21 +379,20 @@ panfrost_transfer_map(struct pipe_context *pctx,
const struct pipe_box *box,
struct pipe_transfer **out_transfer)
{
struct panfrost_context *ctx = pan_context(pctx);
int bytes_per_pixel = util_format_get_blocksize(resource->format);
struct panfrost_bo *bo = pan_resource(resource)->bo;
uint8_t *cpu;
struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer);
transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = bo->stride;
assert(!transfer->box.z);
struct panfrost_gtransfer *transfer = CALLOC_STRUCT(panfrost_gtransfer);
transfer->base.level = level;
transfer->base.usage = usage;
transfer->base.box = *box;
transfer->base.stride = bo->slices[level].stride;
transfer->base.layer_stride = bytes_per_pixel * resource->width0; /* TODO: Cubemaps */
assert(!transfer->base.box.z);
pipe_resource_reference(&transfer->resource, resource);
pipe_resource_reference(&transfer->base.resource, resource);
*out_transfer = transfer;
*out_transfer = &transfer->base;
if (resource->bind & PIPE_BIND_DISPLAY_TARGET ||
resource->bind & PIPE_BIND_SCANOUT ||
@ -427,54 +404,49 @@ panfrost_transfer_map(struct pipe_context *pctx,
panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
}
cpu = panfrost_map_bo(ctx, transfer);
if (cpu == NULL)
return NULL;
if (bo->layout != PAN_LINEAR) {
/* Non-linear resources need to be indirectly mapped */
return cpu + transfer->box.x * bytes_per_pixel + transfer->box.y * bo->stride;
if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
return NULL;
/* TODO: Reads */
transfer->map = malloc(ALIGN(box->width, 16) * ALIGN(box->height, 16) * bytes_per_pixel);
return transfer->map;
} else {
return bo->cpu
+ bo->slices[level].offset
+ transfer->base.box.y * bo->slices[level].stride
+ transfer->base.box.x * bytes_per_pixel;
}
}
static void
panfrost_tile_texture(struct panfrost_screen *screen, struct panfrost_resource *rsrc, int level)
panfrost_tile_texture(struct panfrost_screen *screen, struct panfrost_resource *rsrc, struct panfrost_gtransfer *trans)
{
struct panfrost_bo *bo = (struct panfrost_bo *)rsrc->bo;
int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
int width = rsrc->base.width0 >> level;
int height = rsrc->base.height0 >> level;
unsigned level = trans->base.level;
/* Estimate swizzled bitmap size. Slight overestimates are fine.
* Underestimates will result in memory corruption or worse. */
assert(!trans->base.box.z);
int swizzled_sz = panfrost_swizzled_size(width, height, bytes_per_pixel);
/* Save the entry. But if there was already an entry here (from a
* previous upload of the resource), free that one so we don't leak */
if (bo->entry[level] != NULL) {
bo->entry[level]->freed = true;
pb_slab_free(&screen->slabs, &bo->entry[level]->base);
}
/* Allocate the transfer given that known size but do not copy */
struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, swizzled_sz, HEAP_TEXTURE);
struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
uint8_t *swizzled = backing->cpu + p_entry->offset;
bo->entry[level] = p_entry;
bo->gpu[level] = backing->gpu + p_entry->offset;
/* Run actual texture swizzle, writing directly to the mapped
* GPU chunk we allocated */
panfrost_texture_swizzle(width, height, bytes_per_pixel, bo->stride, bo->cpu[level], swizzled);
panfrost_texture_swizzle(
trans->base.box.x,
trans->base.box.y,
trans->base.box.width,
trans->base.box.height,
util_format_get_blocksize(rsrc->base.format),
bo->slices[level].stride,
trans->map,
bo->cpu + bo->slices[level].offset);
}
static void
panfrost_unmap_bo(struct panfrost_context *ctx,
struct pipe_transfer *transfer)
{
struct panfrost_gtransfer *trans = pan_transfer(transfer);
struct panfrost_bo *bo = (struct panfrost_bo *)pan_resource(transfer->resource)->bo;
if (transfer->usage & PIPE_TRANSFER_WRITE) {
@ -487,10 +459,12 @@ panfrost_unmap_bo(struct panfrost_context *ctx,
} else if (bo->layout == PAN_TILED) {
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = pan_screen(gallium->screen);
panfrost_tile_texture(screen, prsrc, transfer->level);
panfrost_tile_texture(screen, prsrc, trans);
}
}
}
free(trans->map);
}
static void
@ -628,7 +602,6 @@ panfrost_resource_context_init(struct pipe_context *pctx)
pctx->surface_destroy = panfrost_surface_destroy;
pctx->resource_copy_region = util_resource_copy_region;
pctx->blit = panfrost_blit;
//pctx->generate_mipmap = panfrost_generate_mipmap;
pctx->flush_resource = panfrost_flush_resource;
pctx->invalidate_resource = panfrost_invalidate_resource;
pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;

View File

@ -39,32 +39,29 @@ enum panfrost_memory_layout {
PAN_AFBC
};
struct panfrost_slice {
unsigned offset;
unsigned stride;
};
struct panfrost_bo {
/* Address to the BO in question */
struct panfrost_slice slices[MAX_MIP_LEVELS];
uint8_t *cpu[MAX_MIP_LEVELS];
/* Mapping for the entire object (all levels) */
uint8_t *cpu;
/* Not necessarily a GPU mapping of cpu! In case of texture tiling, gpu
* points to the GPU-side, tiled texture, while cpu points to the
* CPU-side, untiled texture from mesa */
/* GPU address for the object */
mali_ptr gpu;
mali_ptr gpu[MAX_MIP_LEVELS];
/* Memory entry corresponding to gpu above */
struct panfrost_memory_entry *entry[MAX_MIP_LEVELS];
/* Size of the entire tree */
size_t size;
/* Set if this bo was imported rather than allocated */
bool imported;
/* Number of bytes of allocation */
size_t size[MAX_MIP_LEVELS];
/* Internal layout (tiled?) */
enum panfrost_memory_layout layout;
/* Is something other than level 0 ever written? */
bool is_mipmap;
/* If AFBC is enabled for this resource, we lug around an AFBC
* metadata buffer as well. The actual AFBC resource is also in
* afbc_slab (only defined for AFBC) at position afbc_main_offset
@ -81,7 +78,6 @@ struct panfrost_bo {
int checksum_stride;
int gem_handle;
unsigned int stride;
};
struct panfrost_resource {
@ -99,6 +95,17 @@ pan_resource(struct pipe_resource *p)
return (struct panfrost_resource *)p;
}
struct panfrost_gtransfer {
struct pipe_transfer base;
void *map;
};
static inline struct panfrost_gtransfer *
pan_transfer(struct pipe_transfer *p)
{
return (struct panfrost_gtransfer *)p;
}
void panfrost_resource_screen_init(struct panfrost_screen *screen);
void panfrost_resource_context_init(struct pipe_context *pctx);

View File

@ -92,12 +92,12 @@ panfrost_sfbd_set_cbuf(
{
struct panfrost_resource *rsrc = pan_resource(surf->texture);
signed stride = rsrc->bo->stride;
signed stride = rsrc->bo->slices[0].stride;
fb->format = panfrost_sfbd_format(surf);
if (rsrc->bo->layout == PAN_LINEAR) {
mali_ptr framebuffer = rsrc->bo->gpu[0];
mali_ptr framebuffer = rsrc->bo->gpu;
/* The default is upside down from OpenGL's perspective. */
if (flip_y) {

View File

@ -147,7 +147,9 @@ swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch,
}
void
panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_stride,
panfrost_texture_swizzle(unsigned off_x,
unsigned off_y,
int width, int height, int bytes_per_pixel, int source_stride,
const uint8_t *pixels,
uint8_t *ldest)
{
@ -155,25 +157,27 @@ panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_
int block_pitch = ALIGN(width, 16) >> 4;
/* Use fast path if available */
if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
return;
} else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) {
swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest);
return;
if (!(off_x || off_y)) {
if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
return;
} else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) {
swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest);
return;
}
}
/* Otherwise, default back on generic path */
for (int y = 0; y < height; ++y) {
int block_y = y >> 4;
int rem_y = y & 0x0F;
int block_y = (y + off_y) >> 4;
int rem_y = (y + off_y) & 0x0F;
int block_start_s = block_y * block_pitch * 256;
int source_start = y * source_stride;
for (int x = 0; x < width; ++x) {
int block_x_s = (x >> 4) * 256;
int rem_x = x & 0x0F;
int block_x_s = ((x + off_x) >> 4) * 256;
int rem_x = (x + off_x) & 0x0F;
int index = space_filler[rem_y][rem_x];
const uint8_t *source = &pixels[source_start + bytes_per_pixel * x];
@ -184,14 +188,3 @@ panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_
}
}
}
unsigned
panfrost_swizzled_size(int width, int height, int bytes_per_pixel)
{
/* Calculate maximum size, overestimating a bit */
int block_pitch = ALIGN(width, 16) >> 4;
unsigned sz = bytes_per_pixel * 256 * ((height >> 4) + 1) * block_pitch;
return sz;
}

View File

@ -31,11 +31,9 @@ void
panfrost_generate_space_filler_indices(void);
void
panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_stride,
panfrost_texture_swizzle(unsigned off_x, unsigned off_y,
int width, int height, int bytes_per_pixel, int source_stride,
const uint8_t *pixels,
uint8_t *ldest);
unsigned
panfrost_swizzled_size(int width, int height, int bytes_per_pixel);
#endif