panfrost/mfbd: Respect per-job depth write flag

While a depth buffer may be supplied, it only needs to be written to if
the depth writemask is set for any draw AND if the depth buffer is not
immediately invalidated (as is the case for scanout). This refactors
panfrost_job to provide a depth write requirement, which is now
implemented for MFBD depth buffers.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2019-03-12 23:16:37 +00:00
parent 9bf6024c6b
commit 8c26890ac2
4 changed files with 42 additions and 20 deletions

View File

@ -50,19 +50,6 @@ extern const char *pan_counters_base;
/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
//#define DRY_RUN
/* TODO: Sample size, etc */
static void
panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
{
struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
job->msaa |= enabled;
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled);
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled);
}
/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
* indepdent between color buffers and depth/stencil). To enable, we allocate
* the AFBC metadata buffer and mark that it is enabled. We do -not- actually
@ -789,15 +776,30 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
void
panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
{
struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
if (with_vertex_data) {
panfrost_emit_vertex_data(ctx);
}
bool msaa = ctx->rasterizer->base.multisample;
if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample);
/* TODO: Sample size */
SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
}
/* Enable job requirements at draw-time */
if (msaa)
job->requirements |= PAN_REQ_MSAA;
if (ctx->depth_stencil->depth.writemask)
job->requirements |= PAN_REQ_DEPTH_WRITE;
if (ctx->occlusion_query) {
ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;

View File

@ -33,6 +33,9 @@ struct panfrost_job_key {
struct pipe_surface *zsbuf;
};
#define PAN_REQ_MSAA (1 << 0)
#define PAN_REQ_DEPTH_WRITE (1 << 1)
/* A panfrost_job corresponds to a bound FBO we're rendering to,
* collecting over multiple draws. */
@ -48,8 +51,10 @@ struct panfrost_job {
float clear_depth;
unsigned clear_stencil;
/* Whether this job uses MSAA */
bool msaa;
/* Whether this job uses the corresponding requirement (PAN_REQ_*
* bitmask) */
unsigned requirements;
};
/* Functions for managing the above */

View File

@ -143,8 +143,6 @@ panfrost_mfbd_set_zsbuf(
fbx->ds_afbc.zero1 = 0x10009;
fbx->ds_afbc.padding = 0x1000;
fb->unk3 |= MALI_MFBD_DEPTH_WRITE;
} else if (rsrc->bo->layout == PAN_LINEAR) {
fb->unk3 |= MALI_MFBD_EXTRA;
fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
@ -246,7 +244,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
rts[0].framebuffer_stride = 0;
}
if (job->msaa) {
/* When scanning out, the depth buffer is immediately invalidated, so
* we don't need to waste bandwidth writing it out. This can improve
* performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
* memory bandwidth!).
*
* The exception is ReadPixels, but this is not supported on GLES so we
* can safely ignore it. */
if (panfrost_is_scanout(ctx)) {
job->requirements &= ~PAN_REQ_DEPTH_WRITE;
}
/* Actualize the requirements */
if (job->requirements & PAN_REQ_MSAA) {
rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
/* XXX */
@ -254,6 +266,9 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
fb.rt_count_2 = 4;
}
if (job->requirements & PAN_REQ_DEPTH_WRITE)
fb.unk3 |= MALI_MFBD_DEPTH_WRITE;
if (ctx->pipe_framebuffer.nr_cbufs == 1) {
struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;

View File

@ -132,7 +132,7 @@ panfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y)
/* TODO */
}
if (job->msaa)
if (job->requirements & PAN_REQ_MSAA)
fb.format |= MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B;
return panfrost_upload_transient(ctx, &fb, sizeof(fb)) | MALI_SFBD;