From 3e6c6bb0af971a7bc9a24f492b3dd8b0b26ffa68 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 24 Jun 2019 07:08:52 -0700 Subject: [PATCH] panfrost: Merge checksum buffer with main BO This is similar to the AFBC merge; now all (non-imported) buffers use a common backing buffer. Reenables checksumming, eliminating a performance regression. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 16 ------ src/gallium/drivers/panfrost/pan_drm.c | 3 -- src/gallium/drivers/panfrost/pan_mfbd.c | 17 ++++-- src/gallium/drivers/panfrost/pan_resource.c | 60 +++++++++++++++------ src/gallium/drivers/panfrost/pan_resource.h | 17 +++--- 5 files changed, 65 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 0160452d208..fcf5e730c2b 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -75,22 +75,6 @@ panfrost_job_type_for_pipe(enum pipe_shader_type type) } } -static void -panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc) -{ - struct pipe_context *gallium = (struct pipe_context *) ctx; - struct panfrost_screen *screen = pan_screen(gallium->screen); - int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; - int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; - - /* 8 byte checksum per tile */ - rsrc->bo->checksum_stride = tile_w * 8; - int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); - panfrost_drm_allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); - - rsrc->bo->has_checksum = true; -} - /* Framebuffer descriptor */ static void diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c index 2ab43cc1e4c..4f354190455 100644 --- a/src/gallium/drivers/panfrost/pan_drm.c +++ b/src/gallium/drivers/panfrost/pan_drm.c @@ -214,9 +214,6 @@ panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, st struct panfrost_resource *res = pan_resource(surf->texture); assert(res->bo->gem_handle > 0); bo_handles[submit.bo_handle_count++] = res->bo->gem_handle; - - if (res->bo->checksum_slab.gem_handle) - bo_handles[submit.bo_handle_count++] = res->bo->checksum_slab.gem_handle; } /* TODO: Add here the transient pools */ diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index af59497fa78..4d5fa4ad9ac 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -289,14 +289,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws) if (job->requirements & PAN_REQ_DEPTH_WRITE) fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE; - if (ctx->pipe_framebuffer.nr_cbufs == 1) { - struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; + /* Checksumming only works with a single render target */ + + if (ctx->pipe_framebuffer.nr_cbufs == 1) { + struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0]; + struct panfrost_resource *rsrc = pan_resource(surf->texture); + struct panfrost_bo *bo = rsrc->bo; + + if (bo->checksummed) { + unsigned level = surf->u.tex.level; + struct panfrost_slice *slice = &bo->slices[level]; - if (rsrc->bo->has_checksum) { fb.mfbd_flags |= MALI_MFBD_EXTRA; fbx.flags |= MALI_EXTRA_PRESENT; - fbx.checksum_stride = rsrc->bo->checksum_stride; - fbx.checksum = rsrc->bo->gpu + rsrc->bo->slices[0].stride * rsrc->base.height0; + fbx.checksum_stride = slice->checksum_stride; + fbx.checksum = bo->gpu + slice->checksum_offset; } } diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index beb5f72d2d8..89403ab183b 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -180,6 +180,31 @@ panfrost_surface_destroy(struct pipe_context *pipe, ralloc_free(surf); } +/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile */ + +#define CHECKSUM_TILE_WIDTH 16 +#define CHECKSUM_TILE_HEIGHT 16 +#define CHECKSUM_BYTES_PER_TILE 8 + +static unsigned +panfrost_compute_checksum_sizes( + struct panfrost_slice *slice, + unsigned width, + unsigned height) +{ + unsigned aligned_width = ALIGN(width, CHECKSUM_TILE_WIDTH); + unsigned aligned_height = ALIGN(width, CHECKSUM_TILE_HEIGHT); + + unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH; + unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT; + + slice->checksum_stride = tile_count_x * CHECKSUM_BYTES_PER_TILE; + + return slice->checksum_stride * tile_count_y; +} + +/* Setup the mip tree given a particular layout, possibly with checksumming */ + static void panfrost_setup_slices(const struct pipe_resource *tmpl, struct panfrost_bo *bo) { @@ -254,6 +279,16 @@ panfrost_setup_slices(const struct pipe_resource *tmpl, struct panfrost_bo *bo) offset += slice_full_size; + /* Add a checksum region if necessary */ + if (bo->checksummed) { + slice->checksum_offset = offset; + + unsigned size = panfrost_compute_checksum_sizes( + slice, width, height); + + offset += size; + } + width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); @@ -307,6 +342,12 @@ panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *t /* Depth/stencil can't be tiled, only linear or AFBC */ should_tile &= !(template->bind & PIPE_BIND_DEPTH_STENCIL); + /* FBOs we would like to checksum, if at all possible */ + bool can_checksum = !(template->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)); + bool should_checksum = template->bind & PIPE_BIND_RENDER_TARGET; + + bo->checksummed = can_checksum && should_checksum; + /* Set the layout appropriately */ bo->layout = should_tile ? PAN_TILED : PAN_LINEAR; @@ -385,7 +426,9 @@ panfrost_resource_create(struct pipe_screen *screen, static void panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *bo) { - if (!bo->imported) { + if (bo->imported) { + panfrost_drm_free_imported_bo(screen, bo); + } else { struct panfrost_memory mem = { .cpu = bo->cpu, .gpu = bo->gpu, @@ -396,21 +439,6 @@ panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *bo) panfrost_drm_free_slab(screen, &mem); } - if (bo->has_checksum) { - struct panfrost_memory mem = { - .cpu = bo->checksum_slab.cpu, - .gpu = bo->checksum_slab.gpu, - .size = bo->checksum_slab.size, - .gem_handle = bo->checksum_slab.gem_handle, - }; - - panfrost_drm_free_slab(screen, &mem); - } - - if (bo->imported) { - panfrost_drm_free_imported_bo(screen, bo); - } - ralloc_free(bo); } diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index 220492039a5..89a4396c093 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -44,10 +44,15 @@ struct panfrost_slice { unsigned offset; unsigned stride; - /* If there is a header preceding each slice, how big is that header? - * Used for AFBC */ + /* If there is a header preceding each slice, how big is + * that header? Used for AFBC */ unsigned header_size; + /* If checksumming is enabled following the slice, what + * is its offset/stride? */ + unsigned checksum_offset; + unsigned checksum_stride; + /* Has anything been written to this slice? */ bool initialized; }; @@ -76,12 +81,8 @@ struct panfrost_bo { /* Internal layout (tiled?) */ enum panfrost_memory_layout layout; - /* If transaciton elimination is enabled, we have a dedicated - * buffer for that as well. */ - - bool has_checksum; - struct panfrost_memory checksum_slab; - int checksum_stride; + /* Is transaciton elimination enabled? */ + bool checksummed; int gem_handle; };