From 165ca6b5ad4fbd20798b6bd8120504761865436c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 23 Jul 2014 11:21:04 -0700 Subject: [PATCH] vc4: Track clears veresus uncleared draws, and the clear color. This is a step toward queueing more than one draw per frame. Fixes piglit attribute0 test, since we get a working clear color now. --- src/gallium/drivers/vc4/vc4_context.c | 70 +++++++++++++++++++-------- src/gallium/drivers/vc4/vc4_context.h | 15 ++++++ src/gallium/drivers/vc4/vc4_draw.c | 66 ++++++++++++++++++++----- 3 files changed, 117 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index e9cba69686e..759fc0280b4 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -89,14 +89,22 @@ vc4_setup_rcl(struct vc4_context *vc4) { struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_resource *ctex = vc4_resource(csurf->base.texture); + uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; uint32_t width = vc4->framebuffer.width; uint32_t height = vc4->framebuffer.height; uint32_t xtiles = align(width, 64) / 64; uint32_t ytiles = align(height, 64) / 64; +#if 0 + fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", + vc4->resolve, + vc4->cleared, + resolve_uncleared); +#endif + cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); - cl_u32(&vc4->rcl, 0xff000000); // Opaque Black - cl_u32(&vc4->rcl, 0xff000000); // 32 bit clear colours need to be repeated twice + cl_u32(&vc4->rcl, vc4->clear_color[0]); + cl_u32(&vc4->rcl, vc4->clear_color[1]); cl_u32(&vc4->rcl, 0); cl_u8(&vc4->rcl, 0); @@ -109,44 +117,60 @@ vc4_setup_rcl(struct vc4_context *vc4) VC4_RENDER_CONFIG_FORMAT_RGBA8888)); cl_u8(&vc4->rcl, 0); - // Do a store of the first tile to force the tile buffer to be cleared - /* XXX: I think these two packets may be unnecessary. */ - if (0) { + /* The tile buffer normally gets cleared when the previous tile is + * stored. If the clear values changed between frames, then the tile + * buffer has stale clear values in it, so we have to do a store in + * None mode (no writes) so that we trigger the tile buffer clear. + */ + if (vc4->cleared & PIPE_CLEAR_COLOR0) { cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); cl_u8(&vc4->rcl, 0); cl_u8(&vc4->rcl, 0); cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, 0); // Store nothing (just clear) - cl_u32(&vc4->rcl, 0); // no address is needed + cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); + cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ } for (int x = 0; x < xtiles; x++) { for (int y = 0; y < ytiles; y++) { + bool end_of_frame = (x == xtiles - 1 && + y == ytiles - 1); + + /* Note that the load doesn't actually occur until the + * tile coords packet is processed. + */ + if (resolve_uncleared & PIPE_CLEAR_COLOR) { + cl_start_reloc(&vc4->rcl, 1); + cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + cl_u8(&vc4->rcl, + VC4_LOADSTORE_TILE_BUFFER_COLOR | + VC4_LOADSTORE_TILE_BUFFER_FORMAT_RASTER); + cl_u8(&vc4->rcl, + VC4_LOADSTORE_TILE_BUFFER_RGBA8888); + cl_reloc(vc4, &vc4->rcl, ctex->bo, + csurf->offset); + } + cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); cl_u8(&vc4->rcl, x); cl_u8(&vc4->rcl, y); - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_COLOR | - VC4_LOADSTORE_TILE_BUFFER_FORMAT_RASTER); - cl_u8(&vc4->rcl, - VC4_LOADSTORE_TILE_BUFFER_RGBA8888); - cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset); - cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc, (y * xtiles + x) * 32); - if (x == xtiles - 1 && y == ytiles - 1) { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + if (vc4->resolve & PIPE_CLEAR_COLOR0) { + if (end_of_frame) { + cl_u8(&vc4->rcl, + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + } else { + cl_u8(&vc4->rcl, + VC4_PACKET_STORE_MS_TILE_BUFFER); + } } else { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER); + assert(!"unfinished: Need to end the frame\n"); } } } @@ -168,6 +192,7 @@ vc4_flush(struct pipe_context *pctx) struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_resource *ctex = vc4_resource(csurf->base.texture); + struct drm_vc4_submit_cl submit; memset(&submit, 0, sizeof(submit)); @@ -207,7 +232,10 @@ vc4_flush(struct pipe_context *pctx) vc4->shader_rec_count = 0; vc4->needs_flush = false; + vc4->draw_call_queued = false; vc4->dirty = ~0; + vc4->resolve = 0; + vc4->cleared = 0; dump_fbo(vc4, ctex->bo); } diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 010727ff4de..55746ea3b71 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -136,6 +136,15 @@ struct vc4_context { /** bitfield of VC4_DIRTY_* */ uint32_t dirty; + /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the + * first rendering. + */ + uint32_t cleared; + /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to + * (either clears or draws). + */ + uint32_t resolve; + uint32_t clear_color[2]; /** * Set if some drawing (triangles, blits, or just a glClear()) has @@ -144,6 +153,12 @@ struct vc4_context { */ bool needs_flush; + /** + * Set when needs_flush, and the queued rendering is not just composed + * of full-buffer clears. + */ + bool draw_call_queued; + struct primconvert_context *primconvert; struct util_hash_table *fs_cache, *vs_cache; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 5abe263b09c..af595466379 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -25,22 +25,20 @@ #include #include "util/u_format.h" +#include "util/u_pack_color.h" #include "indices/u_primconvert.h" #include "vc4_context.h" #include "vc4_resource.h" +/** + * Does the initial bining command list setup for drawing to a given FBO. + */ static void -vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +vc4_start_draw(struct vc4_context *vc4) { - struct vc4_context *vc4 = vc4_context(pctx); - - if (info->mode >= PIPE_PRIM_QUADS) { - util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); - util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); - util_primconvert_draw_vbo(vc4->primconvert, info); + if (vc4->needs_flush) return; - } uint32_t width = vc4->framebuffer.width; uint32_t height = vc4->framebuffer.height; @@ -60,10 +58,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) "tile_state"); } - vc4_update_compiled_shaders(vc4); - - vc4->needs_flush = true; - // Tile state data is 48 bytes per tile, I think it can be thrown away // as soon as binning is finished. cl_start_reloc(&vc4->bcl, 2); @@ -80,6 +74,25 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT); cl_u8(&vc4->bcl, 0x12); // 16 bit triangle + vc4->needs_flush = true; + vc4->draw_call_queued = true; +} + +static void +vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (info->mode >= PIPE_PRIM_QUADS) { + util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); + util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); + util_primconvert_draw_vbo(vc4->primconvert, info); + return; + } + + vc4_start_draw(vc4); + vc4_update_compiled_shaders(vc4); + vc4_emit_state(pctx); /* the actual draw call. */ @@ -168,19 +181,46 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */ } + if (vc4->zsa && vc4->zsa->depth.enabled) { + vc4->resolve |= PIPE_CLEAR_DEPTH; + } + vc4->resolve |= PIPE_CLEAR_COLOR0; vc4->shader_rec_count++; vc4_flush(pctx); } +static uint32_t +pack_rgba(enum pipe_format format, const float *rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + return uc.ui[0]; +} + static void vc4_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct vc4_context *vc4 = vc4_context(pctx); - vc4->needs_flush = true; + /* We can't flag new buffers for clearing once we've queued draws. We + * could avoid this by using the 3d engine to clear. + */ + if (vc4->draw_call_queued) + vc4_flush(pctx); + + if (buffers & PIPE_CLEAR_COLOR0) { + vc4->clear_color[0] = vc4->clear_color[1] = + pack_rgba(vc4->framebuffer.cbufs[0]->format, + color->f); + } + + vc4->cleared |= buffers; + vc4->resolve |= buffers; + + vc4_start_draw(vc4); } static void