freedreno: don't patch and re-emit same shader as much

New textures or vertex buffers don't always require patching and
re-emitting the shaders.  So do a better job of figuring out when we
actually have to patch the shader.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2013-04-25 11:17:02 -04:00
parent 578987ce1c
commit f706d4d340
5 changed files with 65 additions and 64 deletions

View File

@ -144,23 +144,24 @@ struct fd_context {
/* which state objects need to be re-emit'd: */
enum {
FD_DIRTY_BLEND = (1 << 0),
FD_DIRTY_RASTERIZER = (1 << 1),
FD_DIRTY_ZSA = (1 << 2),
FD_DIRTY_FRAGTEX = (1 << 3),
FD_DIRTY_VERTTEX = (1 << 4),
FD_DIRTY_PROG = (1 << 5),
FD_DIRTY_VTX = (1 << 6),
FD_DIRTY_BLEND_COLOR = (1 << 7),
FD_DIRTY_STENCIL_REF = (1 << 8),
FD_DIRTY_SAMPLE_MASK = (1 << 9),
FD_DIRTY_BLEND = (1 << 0),
FD_DIRTY_RASTERIZER = (1 << 1),
FD_DIRTY_ZSA = (1 << 2),
FD_DIRTY_FRAGTEX = (1 << 3),
FD_DIRTY_VERTTEX = (1 << 4),
FD_DIRTY_TEXSTATE = (1 << 5),
FD_DIRTY_PROG = (1 << 6),
FD_DIRTY_BLEND_COLOR = (1 << 7),
FD_DIRTY_STENCIL_REF = (1 << 8),
FD_DIRTY_SAMPLE_MASK = (1 << 9),
FD_DIRTY_FRAMEBUFFER = (1 << 10),
FD_DIRTY_STIPPLE = (1 << 12),
FD_DIRTY_STIPPLE = (1 << 11),
FD_DIRTY_VIEWPORT = (1 << 12),
FD_DIRTY_CONSTBUF = (1 << 13),
FD_DIRTY_VERTEXBUF = (1 << 14),
FD_DIRTY_INDEXBUF = (1 << 15),
FD_DIRTY_SCISSOR = (1 << 16),
FD_DIRTY_VTXSTATE = (1 << 14),
FD_DIRTY_VTXBUF = (1 << 15),
FD_DIRTY_INDEXBUF = (1 << 16),
FD_DIRTY_SCISSOR = (1 << 17),
} dirty;
struct fd_blend_stateobj *blend;

View File

@ -275,11 +275,11 @@ fd_program_validate(struct fd_context *ctx)
prog->dirty = 0;
/* if necessary, fix up vertex fetch instructions: */
if (ctx->dirty & (FD_DIRTY_VTX | FD_DIRTY_VERTEXBUF | FD_DIRTY_PROG))
if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
/* if necessary, fix up texture fetch instructions: */
if (ctx->dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) {
if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
}
@ -400,9 +400,7 @@ create_blit_vp(void)
ir2_reg_create(instr, 1, NULL, 0);
ir2_reg_create(instr, 1, NULL, 0);
return assemble(so);
}
/* Creates shader:

View File

@ -172,11 +172,27 @@ fd_set_vertex_buffers(struct pipe_context *pctx,
{
struct fd_context *ctx = fd_context(pctx);
struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
int i;
/* on a2xx, pitch is encoded in the vtx fetch instruction, so
* we need to mark VTXSTATE as dirty as well to trigger patching
* and re-emitting the vtx shader:
*/
for (i = 0; i < count; i++) {
bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer);
bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer;
uint32_t new_stride = vb ? vb[i].stride : 0;
uint32_t old_stride = so->vb[i].stride;
if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
ctx->dirty |= FD_DIRTY_VTXSTATE;
break;
}
}
util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count);
so->count = util_last_bit(so->enabled_mask);
ctx->dirty |= FD_DIRTY_VERTEXBUF;
ctx->dirty |= FD_DIRTY_VTXBUF;
}
static void
@ -444,7 +460,7 @@ fd_state_emit(struct pipe_context *pctx, uint32_t dirty)
A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
}
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTX | FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) {
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
fd_program_validate(ctx);
fd_program_emit(ring, &ctx->prog);
}

View File

@ -185,6 +185,14 @@ fd_fragtex_sampler_states_bind(struct pipe_context *pctx,
unsigned nr, void **hwcso)
{
struct fd_context *ctx = fd_context(pctx);
/* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader:
*/
if (nr != ctx->fragtex.num_samplers)
ctx->dirty |= FD_DIRTY_TEXSTATE;
bind_sampler_states(&ctx->fragtex, nr, hwcso);
ctx->dirty |= FD_DIRTY_FRAGTEX;
}
@ -195,6 +203,14 @@ fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
struct pipe_sampler_view **views)
{
struct fd_context *ctx = fd_context(pctx);
/* on a2xx, since there is a flat address space for textures/samplers,
* a change in # of fragment textures/samplers will trigger patching and
* re-emitting the vertex shader:
*/
if (nr != ctx->fragtex.num_textures)
ctx->dirty |= FD_DIRTY_TEXSTATE;
set_sampler_views(&ctx->fragtex, nr, views);
ctx->dirty |= FD_DIRTY_FRAGTEX;
}
@ -218,55 +234,24 @@ fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
ctx->dirty |= FD_DIRTY_VERTTEX;
}
static bool
tex_cmp(struct fd_texture_stateobj *tex1, unsigned samp_id1,
struct fd_texture_stateobj *tex2, unsigned samp_id2)
{
if ((samp_id1 >= tex1->num_samplers) ||
(samp_id2 >= tex2->num_samplers))
return false;
if ((tex1 == tex2) && (samp_id1 == samp_id2))
return true;
if (tex1->textures[samp_id1]->texture != tex2->textures[samp_id2]->texture)
return false;
if (memcmp(&tex1->samplers[samp_id1]->base, &tex2->samplers[samp_id2]->base,
sizeof(tex1->samplers[samp_id1]->base)))
return false;
return true;
}
/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
* space of samplers (const-idx), so we need to map the gallium sampler-id
* which is per-shader to a global const-idx space.
*
* Fragment shader sampler maps directly to const-idx, and vertex shader
* is offset by the # of fragment shader samplers. If the # of fragment
* shader samplers changes, this shifts the vertex shader indexes.
*
* TODO maybe we can do frag shader 0..N and vert shader N..0 to avoid
* this??
*/
unsigned
fd_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
unsigned samp_id)
{
unsigned i, const_idx = 0;
/* TODO maybe worth having some sort of cache, because we need to
* do this loop thru all the samplers both when patching shaders
* and also when emitting sampler state..
*/
for (i = 0; i < ctx->verttex.num_samplers; i++) {
if (tex_cmp(&ctx->verttex, i, tex, samp_id))
return const_idx;
const_idx++;
}
for (i = 0; i < ctx->fragtex.num_samplers; i++) {
if (tex_cmp(&ctx->fragtex, i, tex, samp_id))
return const_idx;
const_idx++;
}
return const_idx;
if (tex == &ctx->fragtex)
return samp_id;
return samp_id + ctx->fragtex.num_samplers;
}
void

View File

@ -65,7 +65,7 @@ fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
{
struct fd_context *ctx = fd_context(pctx);
ctx->vtx = hwcso;
ctx->dirty |= FD_DIRTY_VTX;
ctx->dirty |= FD_DIRTY_VTXSTATE;
}
static void
@ -193,9 +193,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* and any buffers used, need to be resolved: */
ctx->resolve |= buffers;
fd_state_emit(pctx, ctx->dirty);
if (ctx->dirty & FD_DIRTY_VTXBUF)
emit_vertexbufs(ctx);
emit_vertexbufs(ctx, info->count);
fd_state_emit(pctx, ctx->dirty);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));