freedreno: don't patch and re-emit same shader as much
New textures or vertex buffers don't always require patching and re-emitting the shaders. So do a better job of figuring out when we actually have to patch the shader. Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
578987ce1c
commit
f706d4d340
|
@ -144,23 +144,24 @@ struct fd_context {
|
|||
|
||||
/* which state objects need to be re-emit'd: */
|
||||
enum {
|
||||
FD_DIRTY_BLEND = (1 << 0),
|
||||
FD_DIRTY_RASTERIZER = (1 << 1),
|
||||
FD_DIRTY_ZSA = (1 << 2),
|
||||
FD_DIRTY_FRAGTEX = (1 << 3),
|
||||
FD_DIRTY_VERTTEX = (1 << 4),
|
||||
FD_DIRTY_PROG = (1 << 5),
|
||||
FD_DIRTY_VTX = (1 << 6),
|
||||
FD_DIRTY_BLEND_COLOR = (1 << 7),
|
||||
FD_DIRTY_STENCIL_REF = (1 << 8),
|
||||
FD_DIRTY_SAMPLE_MASK = (1 << 9),
|
||||
FD_DIRTY_BLEND = (1 << 0),
|
||||
FD_DIRTY_RASTERIZER = (1 << 1),
|
||||
FD_DIRTY_ZSA = (1 << 2),
|
||||
FD_DIRTY_FRAGTEX = (1 << 3),
|
||||
FD_DIRTY_VERTTEX = (1 << 4),
|
||||
FD_DIRTY_TEXSTATE = (1 << 5),
|
||||
FD_DIRTY_PROG = (1 << 6),
|
||||
FD_DIRTY_BLEND_COLOR = (1 << 7),
|
||||
FD_DIRTY_STENCIL_REF = (1 << 8),
|
||||
FD_DIRTY_SAMPLE_MASK = (1 << 9),
|
||||
FD_DIRTY_FRAMEBUFFER = (1 << 10),
|
||||
FD_DIRTY_STIPPLE = (1 << 12),
|
||||
FD_DIRTY_STIPPLE = (1 << 11),
|
||||
FD_DIRTY_VIEWPORT = (1 << 12),
|
||||
FD_DIRTY_CONSTBUF = (1 << 13),
|
||||
FD_DIRTY_VERTEXBUF = (1 << 14),
|
||||
FD_DIRTY_INDEXBUF = (1 << 15),
|
||||
FD_DIRTY_SCISSOR = (1 << 16),
|
||||
FD_DIRTY_VTXSTATE = (1 << 14),
|
||||
FD_DIRTY_VTXBUF = (1 << 15),
|
||||
FD_DIRTY_INDEXBUF = (1 << 16),
|
||||
FD_DIRTY_SCISSOR = (1 << 17),
|
||||
} dirty;
|
||||
|
||||
struct fd_blend_stateobj *blend;
|
||||
|
|
|
@ -275,11 +275,11 @@ fd_program_validate(struct fd_context *ctx)
|
|||
prog->dirty = 0;
|
||||
|
||||
/* if necessary, fix up vertex fetch instructions: */
|
||||
if (ctx->dirty & (FD_DIRTY_VTX | FD_DIRTY_VERTEXBUF | FD_DIRTY_PROG))
|
||||
if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
|
||||
patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
|
||||
|
||||
/* if necessary, fix up texture fetch instructions: */
|
||||
if (ctx->dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) {
|
||||
if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
|
||||
patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
|
||||
patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
|
||||
}
|
||||
|
@ -400,9 +400,7 @@ create_blit_vp(void)
|
|||
ir2_reg_create(instr, 1, NULL, 0);
|
||||
ir2_reg_create(instr, 1, NULL, 0);
|
||||
|
||||
|
||||
return assemble(so);
|
||||
|
||||
}
|
||||
|
||||
/* Creates shader:
|
||||
|
|
|
@ -172,11 +172,27 @@ fd_set_vertex_buffers(struct pipe_context *pctx,
|
|||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
|
||||
int i;
|
||||
|
||||
/* on a2xx, pitch is encoded in the vtx fetch instruction, so
|
||||
* we need to mark VTXSTATE as dirty as well to trigger patching
|
||||
* and re-emitting the vtx shader:
|
||||
*/
|
||||
for (i = 0; i < count; i++) {
|
||||
bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer);
|
||||
bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer;
|
||||
uint32_t new_stride = vb ? vb[i].stride : 0;
|
||||
uint32_t old_stride = so->vb[i].stride;
|
||||
if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
|
||||
ctx->dirty |= FD_DIRTY_VTXSTATE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count);
|
||||
so->count = util_last_bit(so->enabled_mask);
|
||||
|
||||
ctx->dirty |= FD_DIRTY_VERTEXBUF;
|
||||
ctx->dirty |= FD_DIRTY_VTXBUF;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -444,7 +460,7 @@ fd_state_emit(struct pipe_context *pctx, uint32_t dirty)
|
|||
A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTX | FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) {
|
||||
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
|
||||
fd_program_validate(ctx);
|
||||
fd_program_emit(ring, &ctx->prog);
|
||||
}
|
||||
|
|
|
@ -185,6 +185,14 @@ fd_fragtex_sampler_states_bind(struct pipe_context *pctx,
|
|||
unsigned nr, void **hwcso)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
/* on a2xx, since there is a flat address space for textures/samplers,
|
||||
* a change in # of fragment textures/samplers will trigger patching and
|
||||
* re-emitting the vertex shader:
|
||||
*/
|
||||
if (nr != ctx->fragtex.num_samplers)
|
||||
ctx->dirty |= FD_DIRTY_TEXSTATE;
|
||||
|
||||
bind_sampler_states(&ctx->fragtex, nr, hwcso);
|
||||
ctx->dirty |= FD_DIRTY_FRAGTEX;
|
||||
}
|
||||
|
@ -195,6 +203,14 @@ fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
|
|||
struct pipe_sampler_view **views)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
/* on a2xx, since there is a flat address space for textures/samplers,
|
||||
* a change in # of fragment textures/samplers will trigger patching and
|
||||
* re-emitting the vertex shader:
|
||||
*/
|
||||
if (nr != ctx->fragtex.num_textures)
|
||||
ctx->dirty |= FD_DIRTY_TEXSTATE;
|
||||
|
||||
set_sampler_views(&ctx->fragtex, nr, views);
|
||||
ctx->dirty |= FD_DIRTY_FRAGTEX;
|
||||
}
|
||||
|
@ -218,55 +234,24 @@ fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
|
|||
ctx->dirty |= FD_DIRTY_VERTTEX;
|
||||
}
|
||||
|
||||
static bool
|
||||
tex_cmp(struct fd_texture_stateobj *tex1, unsigned samp_id1,
|
||||
struct fd_texture_stateobj *tex2, unsigned samp_id2)
|
||||
{
|
||||
if ((samp_id1 >= tex1->num_samplers) ||
|
||||
(samp_id2 >= tex2->num_samplers))
|
||||
return false;
|
||||
|
||||
if ((tex1 == tex2) && (samp_id1 == samp_id2))
|
||||
return true;
|
||||
|
||||
if (tex1->textures[samp_id1]->texture != tex2->textures[samp_id2]->texture)
|
||||
return false;
|
||||
|
||||
if (memcmp(&tex1->samplers[samp_id1]->base, &tex2->samplers[samp_id2]->base,
|
||||
sizeof(tex1->samplers[samp_id1]->base)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
|
||||
* space of samplers (const-idx), so we need to map the gallium sampler-id
|
||||
* which is per-shader to a global const-idx space.
|
||||
*
|
||||
* Fragment shader sampler maps directly to const-idx, and vertex shader
|
||||
* is offset by the # of fragment shader samplers. If the # of fragment
|
||||
* shader samplers changes, this shifts the vertex shader indexes.
|
||||
*
|
||||
* TODO maybe we can do frag shader 0..N and vert shader N..0 to avoid
|
||||
* this??
|
||||
*/
|
||||
unsigned
|
||||
fd_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
|
||||
unsigned samp_id)
|
||||
{
|
||||
unsigned i, const_idx = 0;
|
||||
|
||||
/* TODO maybe worth having some sort of cache, because we need to
|
||||
* do this loop thru all the samplers both when patching shaders
|
||||
* and also when emitting sampler state..
|
||||
*/
|
||||
|
||||
for (i = 0; i < ctx->verttex.num_samplers; i++) {
|
||||
if (tex_cmp(&ctx->verttex, i, tex, samp_id))
|
||||
return const_idx;
|
||||
const_idx++;
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->fragtex.num_samplers; i++) {
|
||||
if (tex_cmp(&ctx->fragtex, i, tex, samp_id))
|
||||
return const_idx;
|
||||
const_idx++;
|
||||
}
|
||||
|
||||
return const_idx;
|
||||
if (tex == &ctx->fragtex)
|
||||
return samp_id;
|
||||
return samp_id + ctx->fragtex.num_samplers;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -65,7 +65,7 @@ fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
|
|||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
ctx->vtx = hwcso;
|
||||
ctx->dirty |= FD_DIRTY_VTX;
|
||||
ctx->dirty |= FD_DIRTY_VTXSTATE;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -193,9 +193,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
|||
/* and any buffers used, need to be resolved: */
|
||||
ctx->resolve |= buffers;
|
||||
|
||||
fd_state_emit(pctx, ctx->dirty);
|
||||
if (ctx->dirty & FD_DIRTY_VTXBUF)
|
||||
emit_vertexbufs(ctx);
|
||||
|
||||
emit_vertexbufs(ctx, info->count);
|
||||
fd_state_emit(pctx, ctx->dirty);
|
||||
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
|
||||
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
|
||||
|
|
Loading…
Reference in New Issue