From 3c456cf5832f39761afa5699d062a8639bd503af Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 6 Jun 2019 09:55:33 -0700 Subject: [PATCH] freedreno/a6xx: re-arrange program stageobj/group Split out a separate program config state group to run early before the other groups. This seems to help w/ intermittent "missed tiles" (although I had assumed that was a mem2gmem issue), or at least I can't reproduce that issue with this patch, but can without. It has the benefit of HLSQ_VS_CNTL.CONSTLEN matching for VS and BS. Signed-off-by: Rob Clark Reviewed-by: Kristian H. Kristensen --- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 4 + src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 1 + .../drivers/freedreno/a6xx/fd6_program.c | 82 ++++++++++++------- .../drivers/freedreno/a6xx/fd6_program.h | 1 + 4 files changed, 58 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index c9766fd382d..10ebc045503 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -871,6 +871,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) } if (dirty & FD_DIRTY_PROG) { + fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG, 0x7); fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, 0x6); fd6_emit_add_group(emit, prog->binning_stateobj, FD6_GROUP_PROG_BINNING, 0x1); @@ -1029,6 +1030,9 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); OUT_RB(obj, state); + /* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could + * de-duplicate this from program->config_stateobj + */ OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); OUT_RING(obj, mapping->num_ibo); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 691467a0786..ba44904ce4b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -43,6 +43,7 @@ struct fd_ringbuffer; * need to be emit'd. */ enum fd6_state_id { + FD6_GROUP_PROG_CONFIG, FD6_GROUP_PROG, FD6_GROUP_PROG_BINNING, FD6_GROUP_LRZ, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index dedd7ffb2f7..76099177030 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -194,6 +194,50 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian COND(tf->ncomp[3] > 0, A6XX_VPC_SO_BUF_CNTL_BUF3); } +static void +setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state) +{ + OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0xff); /* XXX */ + + debug_assert(state->vs->constlen >= state->bs->constlen); + + OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); + OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(state->vs->constlen, 4)) | + A6XX_HLSQ_VS_CNTL_ENABLED); + OUT_RING(ring, A6XX_HLSQ_HS_CNTL_CONSTLEN(0)); + OUT_RING(ring, A6XX_HLSQ_DS_CNTL_CONSTLEN(0)); + OUT_RING(ring, A6XX_HLSQ_GS_CNTL_CONSTLEN(0)); + + OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1); + OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(state->fs->constlen, 4)) | + A6XX_HLSQ_FS_CNTL_ENABLED); + + OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); + OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) | + A6XX_SP_VS_CONFIG_NIBO(state->vs->image_mapping.num_ibo) | + A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp)); + + OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1); + OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) | + A6XX_SP_FS_CONFIG_NIBO(state->fs->image_mapping.num_ibo) | + A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) | + A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp)); + + OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1); + OUT_RING(ring, COND(false, A6XX_SP_HS_CONFIG_ENABLED)); + + OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1); + OUT_RING(ring, COND(false, A6XX_SP_DS_CONFIG_ENABLED)); + + OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1); + OUT_RING(ring, COND(false, A6XX_SP_GS_CONFIG_ENABLED)); + + OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1); + OUT_RING(ring, state->fs->image_mapping.num_ibo); +} + #define VALIDREG(r) ((r) != regid(63,0)) #define CONDREG(r, val) COND(VALIDREG(r), (val)) @@ -271,29 +315,22 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ - OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2); - OUT_RING(ring, COND(vs, A6XX_SP_VS_CONFIG_ENABLED) | - A6XX_SP_VS_CONFIG_NIBO(vs->image_mapping.num_ibo) | - A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) | - A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp)); /* SP_VS_CONFIG */ + OUT_PKT4(ring, REG_A6XX_SP_VS_INSTRLEN, 1); OUT_RING(ring, vs->instrlen); /* SP_VS_INSTRLEN */ OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 2); - OUT_RING(ring, 0); /* SP_HS_CONFIG */ + OUT_PKT4(ring, REG_A6XX_SP_HS_INSTRLEN, 1); OUT_RING(ring, 0); /* SP_HS_INSTRLEN */ - OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 2); - OUT_RING(ring, 0); /* SP_DS_CONFIG */ + OUT_PKT4(ring, REG_A6XX_SP_DS_INSTRLEN, 1); OUT_RING(ring, 0); /* SP_DS_INSTRLEN */ OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 2); - OUT_RING(ring, 0); /* SP_GS_CONFIG */ + OUT_PKT4(ring, REG_A6XX_SP_GS_INSTRLEN, 1); OUT_RING(ring, 0); /* SP_GS_INSTRLEN */ /* I believe this is related to pre-dispatch texture fetch.. we probably @@ -308,11 +345,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_AB00, 1); OUT_RING(ring, 0x5); - OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2); - OUT_RING(ring, COND(fs, A6XX_SP_FS_CONFIG_ENABLED) | - A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo) | - A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | - A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp)); /* SP_FS_CONFIG */ + OUT_PKT4(ring, REG_A6XX_SP_FS_INSTRLEN, 1); OUT_RING(ring, fs->instrlen); /* SP_FS_INSTRLEN */ OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1); @@ -320,17 +353,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) | 0xfc000000); - OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); - OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(vs->constlen, 4)) | - A6XX_HLSQ_VS_CNTL_ENABLED); - OUT_RING(ring, A6XX_HLSQ_HS_CNTL_CONSTLEN(0)); /* HLSQ_HS_CONSTLEN */ - OUT_RING(ring, A6XX_HLSQ_DS_CNTL_CONSTLEN(0)); /* HLSQ_DS_CONSTLEN */ - OUT_RING(ring, A6XX_HLSQ_GS_CNTL_CONSTLEN(0)); /* HLSQ_GS_CONSTLEN */ - - OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1); - OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(fs->constlen, 4)) | - A6XX_HLSQ_FS_CNTL_ENABLED); - OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1); OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) | A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | @@ -460,9 +482,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1); OUT_RING(ring, 0); /* XXX */ - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); /* XXX */ - OUT_PKT4(ring, REG_A6XX_VPC_GS_SIV_CNTL, 1); OUT_RING(ring, 0x0000ffff); /* XXX */ @@ -677,9 +696,11 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, state->bs = bs; state->vs = vs; state->fs = fs; + state->config_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); + setup_config_stateobj(state->config_stateobj, state); setup_stateobj(state->binning_stateobj, state, key, true); setup_stateobj(state->stateobj, state, key, false); @@ -692,6 +713,7 @@ fd6_program_destroy(void *data, struct ir3_program_state *state) struct fd6_program_state *so = fd6_program_state(state); fd_ringbuffer_del(so->stateobj); fd_ringbuffer_del(so->binning_stateobj); + fd_ringbuffer_del(so->config_stateobj); free(so); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h index 8bc95f5e940..76ef726b40e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -48,6 +48,7 @@ struct fd6_program_state { struct ir3_shader_variant *bs; /* binning pass vs */ struct ir3_shader_variant *vs; struct ir3_shader_variant *fs; + struct fd_ringbuffer *config_stateobj; struct fd_ringbuffer *binning_stateobj; struct fd_ringbuffer *stateobj;