freedreno/a3xx: support for hw binning pass

The binning pass sorts vertices into which bins/tiles they apply to.
The visibility information generated during the binning pass can be
used to speed up the rendering pass by filtering out vertices which
do not apply to the current tile.  See:

 https://github.com/freedreno/freedreno/wiki/Adreno-tiling#optimized-approach

This brings a significant fps boost.  A rough assortment of tests
(supertuxkart, etracer, tremulous, glmark2 'build' test, etc) seems
to yield a ~35-45% fps improvement.

For now, to be conservative, the binning pass is not enabled yet by
default.  To enable it use:

  FD_MESA_DEBUG=binning

So far I haven't found anything that breaks with binning enabled,
but I'd like a bit more testing before I enable it as default.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2014-01-07 10:55:07 -05:00
parent bfb44c24bc
commit c0766528ba
16 changed files with 710 additions and 162 deletions

View File

@ -32,7 +32,7 @@ LIBDRM_RADEON_REQUIRED=2.4.50
LIBDRM_INTEL_REQUIRED=2.4.49
LIBDRM_NVVIEUX_REQUIRED=2.4.33
LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
LIBDRM_FREEDRENO_REQUIRED=2.4.39
LIBDRM_FREEDRENO_REQUIRED=2.4.51
DRI2PROTO_REQUIRED=2.6
DRI3PROTO_REQUIRED=1.0
PRESENTPROTO_REQUIRED=1.0

View File

@ -108,7 +108,7 @@ fd2_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
fd_draw_emit(ctx, info);
fd_draw_emit(ctx, ring, IGNORE_VISIBILITY, info);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
@ -269,8 +269,8 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 3,
INDEX_SIZE_IGN, 0, 0, NULL);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 3, INDEX_SIZE_IGN, 0, 0, NULL);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));

View File

@ -90,8 +90,8 @@ emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 3,
INDEX_SIZE_IGN, 0, 0, NULL);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 3, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@ -212,8 +212,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 3,
INDEX_SIZE_IGN, 0, 0, NULL);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 3, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void

View File

@ -43,7 +43,7 @@
static void
emit_vertexbufs(struct fd_context *ctx)
emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring)
{
struct fd_vertex_stateobj *vtx = ctx->vtx;
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
@ -63,19 +63,17 @@ emit_vertexbufs(struct fd_context *ctx)
bufs[i].format = elem->src_format;
}
fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements);
fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements);
}
static void
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
struct fd_ringbuffer *ring, unsigned dirty, bool binning)
{
struct fd_ringbuffer *ring = ctx->ring;
unsigned dirty = ctx->dirty;
fd3_emit_state(ctx, dirty);
fd3_emit_state(ctx, ring, dirty, binning);
if (dirty & FD_DIRTY_VTXBUF)
emit_vertexbufs(ctx);
emit_vertexbufs(ctx, ring);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
@ -90,7 +88,59 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
fd_draw_emit(ctx, info);
fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info);
}
static void
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
{
unsigned dirty = ctx->dirty;
draw_impl(ctx, info, ctx->binning_ring,
dirty & ~(FD_DIRTY_BLEND), true);
draw_impl(ctx, info, ctx->ring, dirty, false);
}
/* binning pass cmds for a clear:
* NOTE: newer blob drivers don't use binning for clear, which is probably
* preferable since it is low vtx count. However that doesn't seem to
* actually work for me. Not sure if it is depending on support for
* clear pass (rather than using solid-fill shader), or something else
* that newer blob is doing differently. Once that is figured out, we
* can remove fd3_clear_binning().
*/
static void
fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->binning_ring;
fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT |
FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), true);
fd3_program_emit(ring, &ctx->solid_prog, true);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
}, 1);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, 0); /* VFD_INDEX_MIN */
OUT_RING(ring, 2); /* VFD_INDEX_MAX */
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, PERFCOUNTER_STOP);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@ -99,11 +149,14 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
unsigned dirty = ctx->dirty;
unsigned ce, i;
fd3_clear_binning(ctx, dirty);
/* emit generic state now: */
fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT |
FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
fd3_emit_state(ctx, ring, dirty & (FD_DIRTY_VIEWPORT |
FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR), false);
OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
@ -192,7 +245,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
fd3_program_emit(ring, &ctx->solid_prog);
fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
@ -216,8 +269,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, PERFCOUNTER_STOP);
fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
INDEX_SIZE_IGN, 0, 0, NULL);
fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
void

View File

@ -337,10 +337,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
}
void
fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t dirty, bool binning)
{
struct fd_ringbuffer *ring = ctx->ring;
emit_marker(ring, 5);
if (dirty & FD_DIRTY_SAMPLE_MASK) {
@ -354,7 +353,8 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
if (!binning)
fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
OUT_RING(ring, zsa->rb_alpha_ref);
@ -432,7 +432,10 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
}
if (dirty & FD_DIRTY_PROG)
fd3_program_emit(ring, &ctx->prog);
fd3_program_emit(ring, &ctx->prog, binning);
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
struct fd_program_stateobj *prog = &ctx->prog;
@ -566,11 +569,11 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
@ -604,6 +607,9 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
}
OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
OUT_RING(ring, 0x00000000);
emit_cache_flush(ring);
fd_rmw_wfi(ctx, ring);
}

View File

@ -58,7 +58,8 @@ struct fd3_vertex_buf {
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog,
struct fd3_vertex_buf *vbufs, uint32_t n);
void fd3_emit_state(struct fd_context *ctx, uint32_t dirty);
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t dirty, bool binning);
void fd3_emit_restore(struct fd_context *ctx);

View File

@ -106,6 +106,159 @@ depth_base(struct fd_gmem_stateobj *gmem)
return align(gmem->bin_w * gmem->bin_h, 0x4000);
}
static bool
use_hw_binning(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
}
/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
static void update_vsc_pipe(struct fd_context *ctx);
static void
emit_binning_workaround(struct fd_context *ctx)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct fd_ringbuffer *ring = ctx->ring;
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
A3XX_RB_COPY_CONTROL_MODE(0) |
A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */
OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
}, 1);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, 0); /* VFD_INDEX_MIN */
OUT_RING(ring, 2); /* VFD_INDEX_MAX */
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
OUT_RING(ring, 0x00000000); /* viz query info. */
OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
INDEX_SIZE_32_BIT, IGNORE_VISIBILITY));
OUT_RING(ring, 2); /* NumIndices */
OUT_RING(ring, 2);
OUT_RING(ring, 1);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000);
OUT_WFI(ring);
OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000);
}
/* transfer from gmem to system memory (ie. normal RAM) */
static void
@ -129,8 +282,8 @@ emit_gmem2mem_surf(struct fd_context *ctx,
A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format)));
fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
INDEX_SIZE_IGN, 0, 0, NULL);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@ -210,7 +363,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd3_program_emit(ring, &ctx->solid_prog);
fd3_program_emit(ring, &ctx->solid_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
@ -252,8 +405,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
fd3_emit_gmem_restore_tex(ring, psurf);
fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
INDEX_SIZE_IGN, 0, 0, NULL);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
}
static void
@ -355,7 +508,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd3_program_emit(ring, &ctx->blit_prog);
fd3_program_emit(ring, &ctx->blit_prog, false);
fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
{ .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
@ -381,27 +534,14 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
}
static void
update_vsc_pipe(struct fd_context *ctx)
patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
{
struct fd_ringbuffer *ring = ctx->ring;
int i;
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
if (!pipe->bo) {
pipe->bo = fd_bo_new(ctx->screen->dev, 0x40000,
DRM_FREEDRENO_GEM_TYPE_KMEM);
}
OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3);
OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
A3XX_VSC_PIPE_CONFIG_H(pipe->h));
OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[i].DATA_ADDRESS */
OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */
unsigned i;
for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
*patch->cs = patch->val | DRAW(0, 0, 0, vismode);
}
util_dynarray_resize(&ctx->draw_patches, 0);
}
/* for rendering directly to system memory: */
@ -442,6 +582,150 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
patch_draws(ctx, IGNORE_VISIBILITY);
}
static void
update_vsc_pipe(struct fd_context *ctx)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
int i;
OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
if (!pipe->bo) {
pipe->bo = fd_bo_new(ctx->screen->dev, 0x40000,
DRM_FREEDRENO_GEM_TYPE_KMEM);
}
OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
A3XX_VSC_PIPE_CONFIG_H(pipe->h));
OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[i].DATA_ADDRESS */
OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */
}
}
static void
emit_binning_pass(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_ringbuffer *ring = ctx->ring;
int i;
if (ctx->screen->gpu_id == 320) {
emit_binning_workaround(ctx);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00007fff);
}
OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
/* setup scissor/offset for whole screen: */
OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
A3XX_RB_WINDOW_OFFSET_Y(0));
OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
for (i = 0; i < 4; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(0) |
A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
}
OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
A3XX_PC_VSTREAM_CONTROL_N(0));
/* emit IB to binning drawcmds: */
OUT_IB(ring, ctx->binning_start, ctx->binning_end);
/* and then put stuff back the way it was: */
OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, CACHE_FLUSH);
if (ctx->screen->gpu_id == 320) {
/* dummy-draw workaround: */
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, IGNORE_VISIBILITY));
OUT_RING(ring, 0); /* NumIndices */
}
OUT_PKT3(ring, CP_NOP, 4);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_WFI(ring);
if (ctx->screen->gpu_id == 320) {
emit_binning_workaround(ctx);
}
}
/* before first tile */
@ -461,6 +745,18 @@ fd3_emit_tile_init(struct fd_context *ctx)
A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
update_vsc_pipe(ctx);
if (use_hw_binning(ctx)) {
/* mark the end of the binning cmds: */
fd_ringmarker_mark(ctx->binning_end);
/* emit hw binning pass: */
emit_binning_pass(ctx);
patch_draws(ctx, USE_VISIBILITY);
} else {
patch_draws(ctx, IGNORE_VISIBILITY);
}
}
/* before mem2gmem */
@ -472,7 +768,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
struct fd_gmem_stateobj *gmem = &ctx->gmem;
uint32_t reg;
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem));
if (pfb->zsbuf) {
@ -499,6 +794,7 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
static void
fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
@ -508,6 +804,32 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
uint32_t x2 = tile->xoff + tile->bin_w - 1;
uint32_t y2 = tile->yoff + tile->bin_h - 1;
if (use_hw_binning(ctx)) {
struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
assert(pipe->w * pipe->h);
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
OUT_WFI(ring);
OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
A3XX_PC_VSTREAM_CONTROL_N(tile->n));
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, CACHE_FLUSH);
OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
(tile->p * 4), 0, 0);
} else {
OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
OUT_RING(ring, 0x00000000);
}
OUT_PKT3(ring, CP_SET_BIN, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));

View File

@ -36,6 +36,7 @@
#include "fd3_program.h"
#include "fd3_compiler.h"
#include "fd3_emit.h"
#include "fd3_texture.h"
#include "fd3_util.h"
@ -175,9 +176,9 @@ fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso)
}
static void
emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so)
{
struct ir3_shader_info *si = &so->info;
const struct ir3_shader_info *si = &so->info;
enum adreno_state_block sb;
enum adreno_state_src src;
uint32_t i, sz, *bin;
@ -216,7 +217,7 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
}
static int
find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic)
find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
@ -227,14 +228,21 @@ find_output(struct fd3_shader_stateobj *so, fd3_semantic semantic)
void
fd3_program_emit(struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog)
struct fd_program_stateobj *prog, bool binning)
{
struct fd3_shader_stateobj *vp = prog->vp;
struct fd3_shader_stateobj *fp = prog->fp;
struct ir3_shader_info *vsi = &vp->info;
struct ir3_shader_info *fsi = &fp->info;
const struct fd3_shader_stateobj *vp = prog->vp;
const struct fd3_shader_stateobj *fp = prog->fp;
const struct ir3_shader_info *vsi = &vp->info;
const struct ir3_shader_info *fsi = &fp->info;
int i;
if (binning) {
/* use dummy stateobj to simplify binning vs non-binning: */
static const struct fd3_shader_stateobj binning_fp = {};
fp = &binning_fp;
fsi = &fp->info;
}
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
*/
@ -260,11 +268,9 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
// XXX "resolve" (?) bit set on gmem->mem pass..
// COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
// XXX sometimes 0, sometimes 1:
A3XX_SP_SP_CTRL_REG_LOMODE(1));
A3XX_SP_SP_CTRL_REG_L0MODE(0));
OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
@ -272,6 +278,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
A3XX_SP_VS_CTRL_REG0_CACHEINVALID |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
@ -323,28 +330,38 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
if (binning) {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
OUT_RING(ring, 0x00000000);
} else {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
A3XX_SP_FS_CTRL_REG0_CACHEINVALID |
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
}
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
@ -360,24 +377,31 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1));
OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
if (binning) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1));
OUT_RING(ring, 0x00000000);
} else {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1));
OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
}
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
@ -388,10 +412,12 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
emit_shader(ring, fp);
if (!binning) {
emit_shader(ring, fp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
}
OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |

View File

@ -117,7 +117,7 @@ struct fd3_shader_stateobj {
};
void fd3_program_emit(struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog);
struct fd_program_stateobj *prog, bool binning);
void fd3_prog_init(struct pipe_context *pctx);
void fd3_prog_fini(struct pipe_context *pctx);

View File

@ -34,16 +34,11 @@
#include "freedreno_gmem.h"
#include "freedreno_util.h"
static void
fd_context_next_rb(struct pipe_context *pctx)
static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_ringbuffer *ring;
uint32_t ts;
fd_ringmarker_del(ctx->draw_start);
fd_ringmarker_del(ctx->draw_end);
/* grab next ringbuffer: */
ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
@ -56,10 +51,36 @@ fd_context_next_rb(struct pipe_context *pctx)
fd_ringbuffer_reset(ring);
return ring;
}
static void
fd_context_next_rb(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_ringbuffer *ring;
fd_ringmarker_del(ctx->draw_start);
fd_ringmarker_del(ctx->draw_end);
ring = next_rb(ctx);
ctx->draw_start = fd_ringmarker_new(ring);
ctx->draw_end = fd_ringmarker_new(ring);
fd_ringbuffer_set_parent(ring, NULL);
ctx->ring = ring;
fd_ringmarker_del(ctx->binning_start);
fd_ringmarker_del(ctx->binning_end);
ring = next_rb(ctx);
ctx->binning_start = fd_ringmarker_new(ring);
ctx->binning_end = fd_ringmarker_new(ring);
fd_ringbuffer_set_parent(ring, ctx->ring);
ctx->binning_ring = ring;
}
/* emit accumulated render cmds, needed for example if render target has
@ -121,6 +142,10 @@ fd_context_destroy(struct pipe_context *pctx)
DBG("");
util_slab_destroy(&ctx->transfer_pool);
util_dynarray_fini(&ctx->draw_patches);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
@ -129,7 +154,11 @@ fd_context_destroy(struct pipe_context *pctx)
fd_ringmarker_del(ctx->draw_start);
fd_ringmarker_del(ctx->draw_end);
fd_ringbuffer_del(ctx->ring);
fd_ringmarker_del(ctx->binning_start);
fd_ringmarker_del(ctx->binning_end);
for (i = 0; i < ARRAY_SIZE(ctx->rings); i++)
fd_ringbuffer_del(ctx->rings[i]);
for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
@ -176,6 +205,8 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
fd_context_next_rb(pctx);
fd_reset_rmw_state(ctx);
util_dynarray_init(&ctx->draw_patches);
util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
16, UTIL_SLAB_SINGLETHREADED);

View File

@ -111,7 +111,7 @@ struct fd_context {
*/
enum {
/* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
FD_BUFFER_COLOR = PIPE_CLEAR_COLOR0,
FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
@ -148,9 +148,14 @@ struct fd_context {
struct fd_ringbuffer *rings[4];
unsigned rings_idx;
/* normal draw/clear cmds: */
struct fd_ringbuffer *ring;
struct fd_ringmarker *draw_start, *draw_end;
/* binning pass draw/clear cmds: */
struct fd_ringbuffer *binning_ring;
struct fd_ringmarker *binning_start, *binning_end;
/* Keep track if WAIT_FOR_IDLE is needed for registers we need
* to update via RMW:
*/
@ -165,6 +170,11 @@ struct fd_context {
uint32_t rbrc_draw;
} rmw;
/* Keep track of DRAW initiators that need to be patched up depending
* on whether we using binning or not:
*/
struct util_dynarray draw_patches;
struct pipe_scissor_state scissor;
/* we don't have a disable/enable bit for scissor, so instead we keep

View File

@ -54,7 +54,9 @@ size2indextype(unsigned index_size)
/* this is same for a2xx/a3xx, so split into helper: */
void
fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info)
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
@ -78,8 +80,8 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
fd_draw(ctx, ctx->primtypes[info->mode], src_sel, info->count,
idx_type, idx_size, idx_offset, idx_bo);
fd_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
info->count, idx_type, idx_size, idx_offset, idx_bo);
}
static void
@ -180,6 +182,7 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
ctx->clear(ctx, buffers, color, depth, stencil);
ctx->dirty |= FD_DIRTY_ZSA |
FD_DIRTY_VIEWPORT |
FD_DIRTY_RASTERIZER |
FD_DIRTY_SAMPLE_MASK |
FD_DIRTY_PROG |

View File

@ -38,19 +38,21 @@
struct fd_ringbuffer;
void fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info);
void fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info);
void fd_draw_init(struct pipe_context *pctx);
static inline void
fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
enum pc_di_src_sel src_sel, uint32_t count,
enum pc_di_index_size idx_type,
uint32_t idx_size, uint32_t idx_offset,
struct fd_bo *idx_bo)
{
struct fd_ringbuffer *ring = ctx->ring;
/* for debug after a lock up, write a unique counter value
* to scratch7 for each draw, to make it easier to match up
* register dumps to cmdstream. The combination of IB
@ -64,7 +66,7 @@ fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, IGNORE_VISIBILITY));
INDEX_SIZE_IGN, USE_VISIBILITY));
OUT_RING(ring, 0); /* NumIndices */
/* ugg, hard-code register offset to avoid pulling in the
@ -76,8 +78,15 @@ fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
OUT_PKT3(ring, CP_DRAW_INDX, idx_bo ? 5 : 3);
OUT_RING(ring, 0x00000000); /* viz query info. */
OUT_RING(ring, DRAW(primtype, src_sel,
idx_type, IGNORE_VISIBILITY));
if (vismode == USE_VISIBILITY) {
/* leave vis mode blank for now, it will be patched up when
* we know if we are binning or not
*/
OUT_RINGP(ring, DRAW(primtype, src_sel, idx_type, 0),
&ctx->draw_patches);
} else {
OUT_RING(ring, DRAW(primtype, src_sel, idx_type, vismode));
}
OUT_RING(ring, count); /* NumIndices */
if (idx_bo) {
OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);

View File

@ -85,7 +85,8 @@ calculate_tiles(struct fd_context *ctx)
uint32_t bin_w, bin_h;
uint32_t max_width = bin_width(ctx);
uint32_t cpp = 4;
uint32_t i, j, t, p, n, xoff, yoff;
uint32_t i, j, t, xoff, yoff;
uint32_t tpp_x, tpp_y;
bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
if (pfb->cbufs[0])
@ -145,20 +146,65 @@ calculate_tiles(struct fd_context *ctx)
gmem->width = width;
gmem->height = height;
/* Assign tiles and pipes:
* NOTE we currently take a rather simplistic approach of
* mapping rows of tiles to a pipe. At some point it might
* be worth playing with different strategies and seeing if
* that makes much impact on performance.
/*
* Assign tiles and pipes:
*
* At some point it might be worth playing with different
* strategies and seeing if that makes much impact on
* performance.
*/
t = p = n = 0;
#define div_round_up(v, a) (((v) + (a) - 1) / (a))
/* figure out number of tiles per pipe: */
tpp_x = tpp_y = 1;
while (div_round_up(nbins_y, tpp_y) > 8)
tpp_y += 2;
while ((div_round_up(nbins_y, tpp_y) *
div_round_up(nbins_x, tpp_x)) > 8)
tpp_x += 1;
/* configure pipes: */
xoff = yoff = 0;
for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
if (xoff >= nbins_x) {
xoff = 0;
yoff += tpp_y;
}
if (yoff >= nbins_y) {
break;
}
pipe->x = xoff;
pipe->y = yoff;
pipe->w = MIN2(tpp_x, nbins_x - xoff);
pipe->h = MIN2(tpp_y, nbins_y - yoff);
xoff += tpp_x;
}
for (; i < ARRAY_SIZE(ctx->pipe); i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
pipe->x = pipe->y = pipe->w = pipe->h = 0;
}
#if 0 /* debug */
printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
for (i = 0; i < 8; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[i];
printf("pipe[%d]: %ux%u @ %u,%u\n", i,
pipe->w, pipe->h, pipe->x, pipe->y);
}
#endif
/* configure tiles: */
t = 0;
yoff = miny;
for (i = 0; i < nbins_y; i++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[p];
uint32_t bw, bh;
assert(p < ARRAY_SIZE(ctx->pipe));
xoff = minx;
/* clip bin height: */
@ -166,13 +212,20 @@ calculate_tiles(struct fd_context *ctx)
for (j = 0; j < nbins_x; j++) {
struct fd_tile *tile = &ctx->tile[t];
uint32_t n, p;
assert(t < ARRAY_SIZE(ctx->tile));
/* pipe number: */
p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
/* slot number: */
n = ((i % tpp_y) * tpp_x) + (j % tpp_x);
/* clip bin width: */
bw = MIN2(bin_w, minx + width - xoff);
tile->n = n++;
tile->n = n;
tile->p = p;
tile->bin_w = bw;
tile->bin_h = bh;
@ -184,22 +237,19 @@ calculate_tiles(struct fd_context *ctx)
xoff += bw;
}
/* one pipe per row: */
pipe->x = 0;
pipe->y = i;
pipe->w = nbins_x;
pipe->h = 1;
p++;
n = 0;
yoff += bh;
}
for (; p < ARRAY_SIZE(ctx->pipe); p++) {
struct fd_vsc_pipe *pipe = &ctx->pipe[p];
pipe->x = pipe->y = pipe->w = pipe->h = 0;
#if 0 /* debug */
t = 0;
for (i = 0; i < nbins_y; i++) {
for (j = 0; j < nbins_x; j++) {
struct fd_tile *tile = &ctx->tile[t++];
printf("|p:%u n:%u|", tile->p, tile->n);
}
printf("\n");
}
#endif
}
static void
@ -259,6 +309,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
fd_ringmarker_mark(ctx->draw_end);
fd_ringmarker_mark(ctx->binning_end);
if (sysmem) {
DBG("rendering sysmem (%s/%s)",
@ -277,8 +328,9 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
/* GPU executes starting from tile cmds, which IB back to draw cmds: */
fd_ringmarker_flush(ctx->draw_end);
/* mark start for next draw cmds: */
/* mark start for next draw/binning cmds: */
fd_ringmarker_mark(ctx->draw_start);
fd_ringmarker_mark(ctx->binning_start);
fd_reset_rmw_state(ctx);

View File

@ -64,12 +64,15 @@ static const struct debug_named_value debug_options[] = {
{"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
{"dbypass", FD_DBG_DBYPASS,"Disable GMEM bypass"},
{"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
{"binning", FD_DBG_BINNING, "Enable hw binning"},
{"dbinning", FD_DBG_DBINNING, "Disable hw binning"},
DEBUG_NAMED_VALUE_END
};
DEBUG_GET_ONCE_FLAGS_OPTION(fd_mesa_debug, "FD_MESA_DEBUG", debug_options, 0)
int fd_mesa_debug = 0;
bool fd_binning_enabled = false; /* default to off for now */
static const char *
fd_screen_get_name(struct pipe_screen *pscreen)
@ -386,6 +389,12 @@ fd_screen_create(struct fd_device *dev)
fd_mesa_debug = debug_get_option_fd_mesa_debug();
if (fd_mesa_debug & FD_DBG_BINNING)
fd_binning_enabled = true;
if (fd_mesa_debug & FD_DBG_DBINNING)
fd_binning_enabled = false;
if (!screen)
return NULL;

View File

@ -37,6 +37,7 @@
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_half.h"
#include "util/u_dynarray.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
@ -52,16 +53,19 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
/* TBD if it is same on a2xx, but for now: */
#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
#define FD_DBG_MSGS 0x01
#define FD_DBG_DISASM 0x02
#define FD_DBG_DCLEAR 0x04
#define FD_DBG_DGMEM 0x08
#define FD_DBG_DSCIS 0x10
#define FD_DBG_DIRECT 0x20
#define FD_DBG_DBYPASS 0x40
#define FD_DBG_FRAGHALF 0x80
#define FD_DBG_MSGS 0x0001
#define FD_DBG_DISASM 0x0002
#define FD_DBG_DCLEAR 0x0004
#define FD_DBG_DGMEM 0x0008
#define FD_DBG_DSCIS 0x0010
#define FD_DBG_DIRECT 0x0020
#define FD_DBG_DBYPASS 0x0040
#define FD_DBG_FRAGHALF 0x0080
#define FD_DBG_BINNING 0x0100
#define FD_DBG_DBINNING 0x0200
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
#define DBG(fmt, ...) \
do { if (fd_mesa_debug & FD_DBG_MSGS) \
@ -87,6 +91,13 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
(1 << 14);
}
/* for tracking cmdstream positions that need to be patched: */
struct fd_cs_patch {
uint32_t *cs;
uint32_t val;
};
#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
#define fd_patch_element(buf, i) util_dynarray_element(buf, struct fd_cs_patch, i)
static inline enum pipe_format
pipe_surface_format(struct pipe_surface *psurf)
@ -110,6 +121,21 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
*(ring->cur++) = data;
}
/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
static inline void
OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
struct util_dynarray *buf)
{
if (LOG_DWORDS) {
DBG("ring[%p]: OUT_RINGP %04x: %08x", ring,
(uint32_t)(ring->cur - ring->last_start), data);
}
util_dynarray_append(buf, struct fd_cs_patch, ((struct fd_cs_patch){
.cs = ring->cur++,
.val = data,
}));
}
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint32_t or, int32_t shift)
@ -132,7 +158,7 @@ OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint32_t or, int32_t shift)
{
if (LOG_DWORDS) {
DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
DBG("ring[%p]: OUT_RELOCW %04x: %p+%u << %d", ring,
(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
}
fd_ringbuffer_reloc(ring, &(struct fd_reloc){