freedreno/a6xx: use program cache
Use the in-memory cache to construct shader program state and re-use it on subsequent draws, to lower driver overhead. Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
1d7fbe2cd1
commit
b4e94af37d
|
@ -56,6 +56,8 @@ fd6_context_destroy(struct pipe_context *pctx)
|
||||||
|
|
||||||
fd_context_cleanup_common_vbos(&fd6_ctx->base);
|
fd_context_cleanup_common_vbos(&fd6_ctx->base);
|
||||||
|
|
||||||
|
ir3_cache_destroy(fd6_ctx->shader_cache);
|
||||||
|
|
||||||
fd6_texture_fini(pctx);
|
fd6_texture_fini(pctx);
|
||||||
|
|
||||||
free(fd6_ctx);
|
free(fd6_ctx);
|
||||||
|
|
|
@ -38,13 +38,6 @@
|
||||||
|
|
||||||
#include "a6xx.xml.h"
|
#include "a6xx.xml.h"
|
||||||
|
|
||||||
struct fd6_streamout_state {
|
|
||||||
uint32_t ncomp[PIPE_MAX_SO_BUFFERS];
|
|
||||||
uint32_t prog[256/2];
|
|
||||||
uint32_t prog_count;
|
|
||||||
uint32_t vpc_so_buf_cntl;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct fd6_context {
|
struct fd6_context {
|
||||||
struct fd_context base;
|
struct fd_context base;
|
||||||
|
|
||||||
|
@ -101,10 +94,13 @@ struct fd6_context {
|
||||||
/* number of active samples-passed queries: */
|
/* number of active samples-passed queries: */
|
||||||
int samples_passed_queries;
|
int samples_passed_queries;
|
||||||
|
|
||||||
/* cached state about current emitted shader program (3d): */
|
/* maps per-shader-stage state plus variant key to hw
|
||||||
/*{*/
|
* program stateobj:
|
||||||
struct fd6_streamout_state tf;
|
*/
|
||||||
/*}*/
|
struct ir3_cache *shader_cache;
|
||||||
|
|
||||||
|
/* cached stateobjs to avoid hashtable lookup when not dirty: */
|
||||||
|
const struct fd6_program_state *prog;
|
||||||
|
|
||||||
uint16_t tex_seqno;
|
uint16_t tex_seqno;
|
||||||
struct hash_table *tex_cache;
|
struct hash_table *tex_cache;
|
||||||
|
|
|
@ -199,17 +199,17 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||||
{
|
{
|
||||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||||
struct fd6_emit emit = {
|
struct fd6_emit emit = {
|
||||||
.debug = &ctx->debug,
|
.ctx = ctx,
|
||||||
.vtx = &ctx->vtx,
|
.vtx = &ctx->vtx,
|
||||||
.prog = &ctx->prog,
|
|
||||||
.info = info,
|
.info = info,
|
||||||
|
.key = {
|
||||||
|
.vs = ctx->prog.vp,
|
||||||
|
.fs = ctx->prog.fp,
|
||||||
.key = {
|
.key = {
|
||||||
.color_two_side = ctx->rasterizer->light_twoside,
|
.color_two_side = ctx->rasterizer->light_twoside,
|
||||||
.vclamp_color = ctx->rasterizer->clamp_vertex_color,
|
.vclamp_color = ctx->rasterizer->clamp_vertex_color,
|
||||||
.fclamp_color = ctx->rasterizer->clamp_fragment_color,
|
.fclamp_color = ctx->rasterizer->clamp_fragment_color,
|
||||||
.rasterflat = ctx->rasterizer->flatshade,
|
.rasterflat = ctx->rasterizer->flatshade,
|
||||||
.half_precision = ctx->in_blit &&
|
|
||||||
fd_half_precision(&ctx->batch->framebuffer),
|
|
||||||
.ucp_enables = ctx->rasterizer->clip_plane_enable,
|
.ucp_enables = ctx->rasterizer->clip_plane_enable,
|
||||||
.has_per_samp = (fd6_ctx->fsaturate || fd6_ctx->vsaturate ||
|
.has_per_samp = (fd6_ctx->fsaturate || fd6_ctx->vsaturate ||
|
||||||
fd6_ctx->fastc_srgb || fd6_ctx->vastc_srgb),
|
fd6_ctx->fastc_srgb || fd6_ctx->vastc_srgb),
|
||||||
|
@ -223,17 +223,28 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||||
.fastc_srgb = fd6_ctx->fastc_srgb,
|
.fastc_srgb = fd6_ctx->fastc_srgb,
|
||||||
.vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples,
|
.vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples,
|
||||||
.fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples,
|
.fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
.rasterflat = ctx->rasterizer->flatshade,
|
.rasterflat = ctx->rasterizer->flatshade,
|
||||||
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
|
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
|
||||||
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
|
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
|
||||||
};
|
};
|
||||||
|
|
||||||
fixup_shader_state(ctx, &emit.key);
|
fixup_shader_state(ctx, &emit.key.key);
|
||||||
|
|
||||||
unsigned dirty = ctx->dirty;
|
unsigned dirty = ctx->dirty;
|
||||||
const struct ir3_shader_variant *vp = fd6_emit_get_vp(&emit);
|
|
||||||
const struct ir3_shader_variant *fp = fd6_emit_get_fp(&emit);
|
if (!(dirty & FD_DIRTY_PROG)) {
|
||||||
|
emit.prog = fd6_ctx->prog;
|
||||||
|
} else {
|
||||||
|
fd6_ctx->prog = fd6_emit_get_prog(&emit);
|
||||||
|
}
|
||||||
|
|
||||||
|
emit.vs = fd6_emit_get_prog(&emit)->vs;
|
||||||
|
emit.fs = fd6_emit_get_prog(&emit)->fs;
|
||||||
|
|
||||||
|
const struct ir3_shader_variant *vp = emit.vs;
|
||||||
|
const struct ir3_shader_variant *fp = emit.fs;
|
||||||
|
|
||||||
/* do regular pass first, since that is more likely to fail compiling: */
|
/* do regular pass first, since that is more likely to fail compiling: */
|
||||||
|
|
||||||
|
@ -256,8 +267,8 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||||
/* and now binning pass: */
|
/* and now binning pass: */
|
||||||
emit.binning_pass = true;
|
emit.binning_pass = true;
|
||||||
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
|
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
|
||||||
emit.vp = NULL; /* we changed key so need to refetch vp */
|
emit.vs = fd6_emit_get_prog(&emit)->bs;
|
||||||
emit.fp = NULL;
|
|
||||||
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
|
draw_impl(ctx, ctx->batch->binning, &emit, index_offset);
|
||||||
|
|
||||||
if (emit.streamout_mask) {
|
if (emit.streamout_mask) {
|
||||||
|
|
|
@ -533,7 +533,7 @@ fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||||
{
|
{
|
||||||
int32_t i, j;
|
int32_t i, j;
|
||||||
const struct fd_vertex_state *vtx = emit->vtx;
|
const struct fd_vertex_state *vtx = emit->vtx;
|
||||||
const struct ir3_shader_variant *vp = fd6_emit_get_vp(emit);
|
const struct ir3_shader_variant *vp = emit->vs;
|
||||||
|
|
||||||
for (i = 0, j = 0; i <= vp->inputs_count; i++) {
|
for (i = 0, j = 0; i <= vp->inputs_count; i++) {
|
||||||
if (vp->inputs[i].sysval)
|
if (vp->inputs[i].sysval)
|
||||||
|
@ -588,8 +588,9 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
struct fd6_emit *emit)
|
struct fd6_emit *emit)
|
||||||
{
|
{
|
||||||
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
|
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
|
||||||
const struct ir3_shader_variant *vp = fd6_emit_get_vp(emit);
|
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
|
||||||
const struct ir3_shader_variant *fp = fd6_emit_get_fp(emit);
|
const struct ir3_shader_variant *vp = emit->vs;
|
||||||
|
const struct ir3_shader_variant *fp = emit->fs;
|
||||||
const enum fd_dirty_3d_state dirty = emit->dirty;
|
const enum fd_dirty_3d_state dirty = emit->dirty;
|
||||||
bool needs_border = false;
|
bool needs_border = false;
|
||||||
|
|
||||||
|
@ -719,8 +720,19 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
|
OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dirty & FD_DIRTY_PROG)
|
if (dirty & FD_DIRTY_PROG) {
|
||||||
fd6_program_emit(ctx, ring, emit);
|
struct fd_ringbuffer *stateobj = emit->binning_pass ?
|
||||||
|
prog->binning_stateobj : prog->stateobj;
|
||||||
|
|
||||||
|
fd6_emit_add_group(emit, stateobj, FD6_GROUP_PROG, 0x7);
|
||||||
|
|
||||||
|
/* emit remaining non-stateobj program state, ie. what depends
|
||||||
|
* on other emit state, so cannot be pre-baked. This could
|
||||||
|
* be moved to a separate stateobj which is dynamically
|
||||||
|
* created.
|
||||||
|
*/
|
||||||
|
fd6_program_emit(ring, emit);
|
||||||
|
}
|
||||||
|
|
||||||
if (dirty & FD_DIRTY_RASTERIZER) {
|
if (dirty & FD_DIRTY_RASTERIZER) {
|
||||||
struct fd6_rasterizer_stateobj *rasterizer =
|
struct fd6_rasterizer_stateobj *rasterizer =
|
||||||
|
@ -854,7 +866,7 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (emit->streamout_mask) {
|
if (emit->streamout_mask) {
|
||||||
struct fd6_streamout_state *tf = &fd6_context(ctx)->tf;
|
const struct fd6_streamout_state *tf = &prog->tf;
|
||||||
|
|
||||||
OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * tf->prog_count));
|
OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * tf->prog_count));
|
||||||
OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL);
|
OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL);
|
||||||
|
|
|
@ -43,6 +43,7 @@ struct fd_ringbuffer;
|
||||||
* need to be emit'd.
|
* need to be emit'd.
|
||||||
*/
|
*/
|
||||||
enum fd6_state_id {
|
enum fd6_state_id {
|
||||||
|
FD6_GROUP_PROG,
|
||||||
FD6_GROUP_VS_CONST,
|
FD6_GROUP_VS_CONST,
|
||||||
FD6_GROUP_FS_CONST,
|
FD6_GROUP_FS_CONST,
|
||||||
FD6_GROUP_VS_TEX,
|
FD6_GROUP_VS_TEX,
|
||||||
|
@ -57,12 +58,11 @@ struct fd6_state_group {
|
||||||
|
|
||||||
/* grouped together emit-state for prog/vertex/state emit: */
|
/* grouped together emit-state for prog/vertex/state emit: */
|
||||||
struct fd6_emit {
|
struct fd6_emit {
|
||||||
struct pipe_debug_callback *debug;
|
struct fd_context *ctx;
|
||||||
const struct fd_vertex_state *vtx;
|
const struct fd_vertex_state *vtx;
|
||||||
const struct fd_program_stateobj *prog;
|
|
||||||
const struct pipe_draw_info *info;
|
const struct pipe_draw_info *info;
|
||||||
bool binning_pass;
|
bool binning_pass;
|
||||||
struct ir3_shader_key key;
|
struct ir3_cache_key key;
|
||||||
enum fd_dirty_3d_state dirty;
|
enum fd_dirty_3d_state dirty;
|
||||||
|
|
||||||
uint32_t sprite_coord_enable; /* bitmask */
|
uint32_t sprite_coord_enable; /* bitmask */
|
||||||
|
@ -76,9 +76,11 @@ struct fd6_emit {
|
||||||
*/
|
*/
|
||||||
bool no_lrz_write;
|
bool no_lrz_write;
|
||||||
|
|
||||||
/* cached to avoid repeated lookups of same variants: */
|
/* cached to avoid repeated lookups: */
|
||||||
const struct ir3_shader_variant *vp, *fp;
|
const struct fd6_program_state *prog;
|
||||||
/* TODO: other shader stages.. */
|
|
||||||
|
struct ir3_shader_variant *vs;
|
||||||
|
struct ir3_shader_variant *fs;
|
||||||
|
|
||||||
unsigned streamout_mask;
|
unsigned streamout_mask;
|
||||||
|
|
||||||
|
@ -86,32 +88,16 @@ struct fd6_emit {
|
||||||
unsigned num_groups;
|
unsigned num_groups;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline const struct ir3_shader_variant *
|
static inline const struct fd6_program_state *
|
||||||
fd6_emit_get_vp(struct fd6_emit *emit)
|
fd6_emit_get_prog(struct fd6_emit *emit)
|
||||||
{
|
{
|
||||||
if (!emit->vp) {
|
if (!emit->prog) {
|
||||||
struct ir3_shader *shader = emit->prog->vp;
|
struct fd6_context *fd6_ctx = fd6_context(emit->ctx);
|
||||||
emit->vp = ir3_shader_variant(shader, emit->key,
|
struct ir3_program_state *s =
|
||||||
emit->binning_pass, emit->debug);
|
ir3_cache_lookup(fd6_ctx->shader_cache, &emit->key, &emit->ctx->debug);
|
||||||
|
emit->prog = fd6_program_state(s);
|
||||||
}
|
}
|
||||||
return emit->vp;
|
return emit->prog;
|
||||||
}
|
|
||||||
|
|
||||||
static inline const struct ir3_shader_variant *
|
|
||||||
fd6_emit_get_fp(struct fd6_emit *emit)
|
|
||||||
{
|
|
||||||
if (!emit->fp) {
|
|
||||||
if (emit->binning_pass) {
|
|
||||||
/* use dummy stateobj to simplify binning vs non-binning: */
|
|
||||||
static const struct ir3_shader_variant binning_fp = {};
|
|
||||||
emit->fp = &binning_fp;
|
|
||||||
} else {
|
|
||||||
struct ir3_shader *shader = emit->prog->fp;
|
|
||||||
emit->fp = ir3_shader_variant(shader, emit->key,
|
|
||||||
false, emit->debug);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return emit->fp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
|
|
@ -59,6 +59,8 @@ static void
|
||||||
fd6_fp_state_delete(struct pipe_context *pctx, void *hwcso)
|
fd6_fp_state_delete(struct pipe_context *pctx, void *hwcso)
|
||||||
{
|
{
|
||||||
struct ir3_shader *so = hwcso;
|
struct ir3_shader *so = hwcso;
|
||||||
|
struct fd_context *ctx = fd_context(pctx);
|
||||||
|
ir3_cache_invalidate(fd6_context(ctx)->shader_cache, hwcso);
|
||||||
ir3_shader_destroy(so);
|
ir3_shader_destroy(so);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +75,8 @@ static void
|
||||||
fd6_vp_state_delete(struct pipe_context *pctx, void *hwcso)
|
fd6_vp_state_delete(struct pipe_context *pctx, void *hwcso)
|
||||||
{
|
{
|
||||||
struct ir3_shader *so = hwcso;
|
struct ir3_shader *so = hwcso;
|
||||||
|
struct fd_context *ctx = fd_context(pctx);
|
||||||
|
ir3_cache_invalidate(fd6_context(ctx)->shader_cache, hwcso);
|
||||||
ir3_shader_destroy(so);
|
ir3_shader_destroy(so);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,11 +182,11 @@ link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_stream_out(struct fd_context *ctx, const struct ir3_shader_variant *v,
|
setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_variant *v,
|
||||||
struct ir3_shader_linkage *l)
|
struct ir3_shader_linkage *l)
|
||||||
{
|
{
|
||||||
const struct pipe_stream_output_info *strmout = &v->shader->stream_output;
|
const struct pipe_stream_output_info *strmout = &v->shader->stream_output;
|
||||||
struct fd6_streamout_state *tf = &fd6_context(ctx)->tf;
|
struct fd6_streamout_state *tf = &state->tf;
|
||||||
|
|
||||||
memset(tf, 0, sizeof(*tf));
|
memset(tf, 0, sizeof(*tf));
|
||||||
|
|
||||||
|
@ -251,12 +255,19 @@ enum {
|
||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_stages(struct fd6_emit *emit, struct stage *s)
|
setup_stages(struct fd6_program_state *state, struct stage *s, bool binning_pass)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
s[VS].v = fd6_emit_get_vp(emit);
|
if (binning_pass) {
|
||||||
s[FS].v = fd6_emit_get_fp(emit);
|
static const struct ir3_shader_variant dummy_fs = {0};
|
||||||
|
|
||||||
|
s[VS].v = state->bs;
|
||||||
|
s[FS].v = &dummy_fs;
|
||||||
|
} else {
|
||||||
|
s[VS].v = state->vs;
|
||||||
|
s[FS].v = state->fs;
|
||||||
|
}
|
||||||
|
|
||||||
s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
|
s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
|
||||||
|
|
||||||
|
@ -287,9 +298,9 @@ setup_stages(struct fd6_emit *emit, struct stage *s)
|
||||||
s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
|
s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
setup_stateobj(struct fd_ringbuffer *ring,
|
||||||
struct fd6_emit *emit)
|
struct fd6_program_state *state, bool binning_pass)
|
||||||
{
|
{
|
||||||
struct stage s[MAX_STAGES];
|
struct stage s[MAX_STAGES];
|
||||||
uint32_t pos_regid, psize_regid, color_regid[8];
|
uint32_t pos_regid, psize_regid, color_regid[8];
|
||||||
|
@ -299,7 +310,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
uint8_t psize_loc = ~0;
|
uint8_t psize_loc = ~0;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
setup_stages(emit, s);
|
setup_stages(state, s, binning_pass);
|
||||||
|
|
||||||
fssz = FOUR_QUADS;
|
fssz = FOUR_QUADS;
|
||||||
|
|
||||||
|
@ -392,8 +403,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
struct ir3_shader_linkage l = {0};
|
struct ir3_shader_linkage l = {0};
|
||||||
ir3_link_shaders(&l, s[VS].v, s[FS].v);
|
ir3_link_shaders(&l, s[VS].v, s[FS].v);
|
||||||
|
|
||||||
if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
|
if ((s[VS].v->shader->stream_output.num_outputs > 0) && !binning_pass)
|
||||||
!emit->binning_pass)
|
|
||||||
link_stream_out(&l, s[VS].v);
|
link_stream_out(&l, s[VS].v);
|
||||||
|
|
||||||
BITSET_DECLARE(varbs, 128) = {0};
|
BITSET_DECLARE(varbs, 128) = {0};
|
||||||
|
@ -418,9 +428,8 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
|
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
|
if ((s[VS].v->shader->stream_output.num_outputs > 0) && !binning_pass) {
|
||||||
!emit->binning_pass) {
|
setup_stream_out(state, s[VS].v, &l);
|
||||||
setup_stream_out(ctx, s[VS].v, &l);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
|
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
|
||||||
|
@ -478,7 +487,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(l.max_loc) |
|
OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(l.max_loc) |
|
||||||
COND(psize_regid != regid(63,0), 0x100));
|
COND(psize_regid != regid(63,0), 0x100));
|
||||||
|
|
||||||
if (emit->binning_pass) {
|
if (binning_pass) {
|
||||||
OUT_PKT4(ring, REG_A6XX_SP_FS_OBJ_START_LO, 2);
|
OUT_PKT4(ring, REG_A6XX_SP_FS_OBJ_START_LO, 2);
|
||||||
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */
|
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */
|
||||||
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */
|
OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */
|
||||||
|
@ -549,8 +558,10 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
|
|
||||||
OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
|
OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
|
||||||
for (i = 0; i < 8; i++) {
|
for (i = 0; i < 8; i++) {
|
||||||
|
// TODO we could have a mix of half and full precision outputs,
|
||||||
|
// we really need to figure out half-precision from IR3_REG_HALF
|
||||||
OUT_RING(ring, A6XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
|
OUT_RING(ring, A6XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
|
||||||
COND(emit->key.half_precision,
|
COND(false,
|
||||||
A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
|
A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -559,27 +570,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
A6XX_VPC_PACK_PSIZELOC(psize_loc) |
|
A6XX_VPC_PACK_PSIZELOC(psize_loc) |
|
||||||
A6XX_VPC_PACK_STRIDE_IN_VPC(l.max_loc));
|
A6XX_VPC_PACK_STRIDE_IN_VPC(l.max_loc));
|
||||||
|
|
||||||
if (!emit->binning_pass) {
|
if (!binning_pass) {
|
||||||
uint32_t vinterp[8], vpsrepl[8];
|
|
||||||
|
|
||||||
memset(vinterp, 0, sizeof(vinterp));
|
|
||||||
memset(vpsrepl, 0, sizeof(vpsrepl));
|
|
||||||
|
|
||||||
/* looks like we need to do int varyings in the frag
|
|
||||||
* shader on a5xx (no flatshad reg? or a420.0 bug?):
|
|
||||||
*
|
|
||||||
* (sy)(ss)nop
|
|
||||||
* (sy)ldlv.u32 r0.x,l[r0.x], 1
|
|
||||||
* ldlv.u32 r0.y,l[r0.x+1], 1
|
|
||||||
* (ss)bary.f (ei)r63.x, 0, r0.x
|
|
||||||
* (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
|
|
||||||
* (rpt5)nop
|
|
||||||
* sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
|
|
||||||
*
|
|
||||||
* Possibly on later a5xx variants we'll be able to use
|
|
||||||
* something like the code below instead of workaround
|
|
||||||
* in the shader:
|
|
||||||
*/
|
|
||||||
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
|
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
|
||||||
for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
|
for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
|
||||||
/* NOTE: varyings are packed, so if compmask is 0xb
|
/* NOTE: varyings are packed, so if compmask is 0xb
|
||||||
|
@ -590,20 +581,83 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
|
|
||||||
uint32_t inloc = s[FS].v->inputs[j].inloc;
|
uint32_t inloc = s[FS].v->inputs[j].inloc;
|
||||||
|
|
||||||
if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
|
if (s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) {
|
||||||
(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
|
uint32_t loc = inloc;
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
if (compmask & (1 << i)) {
|
||||||
|
state->vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
|
||||||
|
loc++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!binning_pass)
|
||||||
|
if (s[FS].instrlen)
|
||||||
|
fd6_emit_shader(ring, s[FS].v);
|
||||||
|
|
||||||
|
OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
|
||||||
|
OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
|
||||||
|
A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
|
||||||
|
0xfcfc0000);
|
||||||
|
OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */
|
||||||
|
OUT_RING(ring, 0xfcfcfcfc); /* VFD_CONTROL_3 */
|
||||||
|
OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
|
||||||
|
OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_5 */
|
||||||
|
OUT_RING(ring, 0x00000000); /* VFD_CONTROL_6 */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* emits the program state which is not part of the stateobj because of
|
||||||
|
* dependency on other gl state (rasterflat or sprite-coord-replacement)
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||||
|
{
|
||||||
|
const struct fd6_program_state *state = fd6_emit_get_prog(emit);
|
||||||
|
|
||||||
|
if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
|
||||||
|
/* fastpath: */
|
||||||
|
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
OUT_RING(ring, state->vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
|
||||||
|
|
||||||
|
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
OUT_RING(ring, 0x00000000); /* VPC_VARYING_PS_REPL[i] */
|
||||||
|
} else {
|
||||||
|
/* slow-path: */
|
||||||
|
struct ir3_shader_variant *fs = state->fs;
|
||||||
|
uint32_t vinterp[8], vpsrepl[8];
|
||||||
|
|
||||||
|
memset(vinterp, 0, sizeof(vinterp));
|
||||||
|
memset(vpsrepl, 0, sizeof(vpsrepl));
|
||||||
|
|
||||||
|
for (int i = 0; i < state->fs_inputs_count; i++) {
|
||||||
|
int j = state->fs_inputs[i];
|
||||||
|
|
||||||
|
/* NOTE: varyings are packed, so if compmask is 0xb
|
||||||
|
* then first, third, and fourth component occupy
|
||||||
|
* three consecutive varying slots:
|
||||||
|
*/
|
||||||
|
unsigned compmask = fs->inputs[j].compmask;
|
||||||
|
|
||||||
|
uint32_t inloc = fs->inputs[j].inloc;
|
||||||
|
|
||||||
|
if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) ||
|
||||||
|
(fs->inputs[j].rasterflat && emit->rasterflat)) {
|
||||||
uint32_t loc = inloc;
|
uint32_t loc = inloc;
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
if (compmask & (1 << i)) {
|
if (compmask & (1 << i)) {
|
||||||
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
|
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
|
||||||
//flatshade[loc / 32] |= 1 << (loc % 32);
|
|
||||||
loc++;
|
loc++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gl_varying_slot slot = s[FS].v->inputs[j].slot;
|
gl_varying_slot slot = fs->inputs[j].slot;
|
||||||
|
|
||||||
/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
|
/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
|
||||||
if (slot >= VARYING_SLOT_VAR0) {
|
if (slot >= VARYING_SLOT_VAR0) {
|
||||||
|
@ -642,32 +696,57 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
}
|
}
|
||||||
|
|
||||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
|
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
|
||||||
for (i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
|
OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
|
||||||
|
|
||||||
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
||||||
for (i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
|
OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!emit->binning_pass)
|
|
||||||
if (s[FS].instrlen)
|
|
||||||
fd6_emit_shader(ring, s[FS].v);
|
|
||||||
|
|
||||||
OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
|
|
||||||
OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
|
|
||||||
A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
|
|
||||||
0xfcfc0000);
|
|
||||||
OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */
|
|
||||||
OUT_RING(ring, 0xfcfcfcfc); /* VFD_CONTROL_3 */
|
|
||||||
OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
|
|
||||||
OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_5 */
|
|
||||||
OUT_RING(ring, 0x00000000); /* VFD_CONTROL_6 */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct ir3_program_state *
|
||||||
|
fd6_program_create(void *data, struct ir3_shader_variant *bs,
|
||||||
|
struct ir3_shader_variant *vs,
|
||||||
|
struct ir3_shader_variant *fs,
|
||||||
|
const struct ir3_shader_key *key)
|
||||||
|
{
|
||||||
|
struct fd_context *ctx = data;
|
||||||
|
struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
|
||||||
|
|
||||||
|
state->bs = bs;
|
||||||
|
state->vs = vs;
|
||||||
|
state->fs = fs;
|
||||||
|
state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||||
|
state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||||
|
|
||||||
|
setup_stateobj(state->binning_stateobj, state, true);
|
||||||
|
setup_stateobj(state->stateobj, state, false);
|
||||||
|
|
||||||
|
return &state->base;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
fd6_program_destroy(void *data, struct ir3_program_state *state)
|
||||||
|
{
|
||||||
|
struct fd6_program_state *so = fd6_program_state(state);
|
||||||
|
fd_ringbuffer_del(so->stateobj);
|
||||||
|
fd_ringbuffer_del(so->binning_stateobj);
|
||||||
|
free(so);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct ir3_cache_funcs cache_funcs = {
|
||||||
|
.create_state = fd6_program_create,
|
||||||
|
.destroy_state = fd6_program_destroy,
|
||||||
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
fd6_prog_init(struct pipe_context *pctx)
|
fd6_prog_init(struct pipe_context *pctx)
|
||||||
{
|
{
|
||||||
|
struct fd_context *ctx = fd_context(pctx);
|
||||||
|
|
||||||
|
fd6_context(ctx)->shader_cache = ir3_cache_create(&cache_funcs, ctx);
|
||||||
|
|
||||||
pctx->create_fs_state = fd6_fp_state_create;
|
pctx->create_fs_state = fd6_fp_state_create;
|
||||||
pctx->delete_fs_state = fd6_fp_state_delete;
|
pctx->delete_fs_state = fd6_fp_state_delete;
|
||||||
|
|
||||||
|
|
|
@ -31,13 +31,44 @@
|
||||||
#include "pipe/p_context.h"
|
#include "pipe/p_context.h"
|
||||||
#include "freedreno_context.h"
|
#include "freedreno_context.h"
|
||||||
#include "ir3_shader.h"
|
#include "ir3_shader.h"
|
||||||
|
#include "ir3_cache.h"
|
||||||
|
|
||||||
|
struct fd6_streamout_state {
|
||||||
|
uint32_t ncomp[PIPE_MAX_SO_BUFFERS];
|
||||||
|
uint32_t prog[256/2];
|
||||||
|
uint32_t prog_count;
|
||||||
|
uint32_t vpc_so_buf_cntl;
|
||||||
|
};
|
||||||
|
|
||||||
struct fd6_emit;
|
struct fd6_emit;
|
||||||
|
|
||||||
|
struct fd6_program_state {
|
||||||
|
struct ir3_program_state base;
|
||||||
|
struct ir3_shader_variant *bs; /* binning pass vs */
|
||||||
|
struct ir3_shader_variant *vs;
|
||||||
|
struct ir3_shader_variant *fs;
|
||||||
|
struct fd_ringbuffer *binning_stateobj;
|
||||||
|
struct fd_ringbuffer *stateobj;
|
||||||
|
|
||||||
|
/* cached state about current emitted shader program (3d): */
|
||||||
|
struct fd6_streamout_state tf;
|
||||||
|
|
||||||
|
/* index and # of varyings: */
|
||||||
|
uint8_t fs_inputs[16];
|
||||||
|
uint8_t fs_inputs_count;
|
||||||
|
|
||||||
|
uint32_t vinterp[8];
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct fd6_program_state *
|
||||||
|
fd6_program_state(struct ir3_program_state *state)
|
||||||
|
{
|
||||||
|
return (struct fd6_program_state *)state;
|
||||||
|
}
|
||||||
|
|
||||||
void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
|
void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
|
||||||
|
|
||||||
void fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
void fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit);
|
||||||
struct fd6_emit *emit);
|
|
||||||
|
|
||||||
void fd6_prog_init(struct pipe_context *pctx);
|
void fd6_prog_init(struct pipe_context *pctx);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue