freedreno/a6xx: Convert to dirty_groups

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4106
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9581>
This commit is contained in:
Rob Clark 2021-03-12 15:03:52 -08:00
parent 32bc809a08
commit 9401d5bf1a
6 changed files with 361 additions and 247 deletions

View File

@ -158,9 +158,8 @@ emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
fd6_emit_const_user(ring, v, regid * 4, num_params, params);
}
static void
emit_tess_consts(struct fd6_emit *emit)
assert_dt
struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit *emit)
{
struct fd_context *ctx = emit->ctx;
@ -227,7 +226,7 @@ emit_tess_consts(struct fd6_emit *emit)
emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params));
}
fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL);
return constobj;
}
static void
@ -312,9 +311,8 @@ user_consts_cmdstream_size(struct ir3_shader_variant *v)
return ubo_state->cmdstream_size;
}
static void
emit_user_consts(struct fd6_emit *emit)
assert_dt
struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit *emit)
{
static const enum pipe_shader_type types[] = {
PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
@ -342,33 +340,26 @@ emit_user_consts(struct fd6_emit *emit)
fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]);
}
fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL);
return constobj;
}
void
fd6_emit_consts(struct fd6_emit *emit)
struct fd_ringbuffer *
fd6_build_vs_driver_params(struct fd6_emit *emit)
{
struct fd_context *ctx = emit->ctx;
struct fd6_context *fd6_ctx = fd6_context(ctx);
if (emit->dirty & (FD_DIRTY_CONST | FD_DIRTY_PROG))
emit_user_consts(emit);
if (emit->key.key.has_gs || emit->key.key.tessellation)
emit_tess_consts(emit);
/* if driver-params are needed, emit each time: */
const struct ir3_shader_variant *vs = emit->vs;
if (vs->need_driver_params) {
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info, emit->indirect, emit->draw);
fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
fd6_ctx->has_dp_state = true;
} else if (fd6_ctx->has_dp_state) {
fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
fd6_ctx->has_dp_state = false;
return dpconstobj;
}
fd6_ctx->has_dp_state = false;
return NULL;
}
void

View File

@ -28,7 +28,10 @@
#include "fd6_emit.h"
void fd6_emit_consts(struct fd6_emit *emit) assert_dt;
struct fd_ringbuffer * fd6_build_tess_consts(struct fd6_emit *emit) assert_dt;
struct fd_ringbuffer * fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
struct fd_ringbuffer * fd6_build_vs_driver_params(struct fd6_emit *emit) assert_dt;
void fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
enum pipe_shader_type stage, struct fd_ringbuffer *ring) assert_dt;
void fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,

View File

@ -126,6 +126,55 @@ fd6_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
FREE(hwcso);
}
static void
setup_state_map(struct fd_context *ctx)
{
STATIC_ASSERT(FD6_GROUP_NON_GROUP < 32);
fd_context_add_map(ctx, FD_DIRTY_VTXSTATE, BIT(FD6_GROUP_VTXSTATE));
fd_context_add_map(ctx, FD_DIRTY_VTXBUF, BIT(FD6_GROUP_VBO));
fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_ZSA));
fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG,
BIT(FD6_GROUP_LRZ) | BIT(FD6_GROUP_LRZ_BINNING));
fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_PROG));
fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER));
fd_context_add_map(ctx, FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL,
BIT(FD6_GROUP_PROG_FB_RAST));
fd_context_add_map(ctx, FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK, BIT(FD6_GROUP_BLEND));
fd_context_add_map(ctx, FD_DIRTY_BLEND_COLOR, BIT(FD6_GROUP_BLEND_COLOR));
fd_context_add_map(ctx, FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_PROG,
BIT(FD6_GROUP_IBO));
fd_context_add_map(ctx, FD_DIRTY_PROG, BIT(FD6_GROUP_VS_TEX) | BIT(FD6_GROUP_HS_TEX) |
BIT(FD6_GROUP_DS_TEX) | BIT(FD6_GROUP_GS_TEX) | BIT(FD6_GROUP_FS_TEX));
fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_CONST, BIT(FD6_GROUP_CONST));
fd_context_add_map(ctx, FD_DIRTY_STREAMOUT, BIT(FD6_GROUP_SO));
fd_context_add_shader_map(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_TEX,
BIT(FD6_GROUP_VS_TEX));
fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_CTRL, FD_DIRTY_SHADER_TEX,
BIT(FD6_GROUP_HS_TEX));
fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_EVAL, FD_DIRTY_SHADER_TEX,
BIT(FD6_GROUP_DS_TEX));
fd_context_add_shader_map(ctx, PIPE_SHADER_GEOMETRY, FD_DIRTY_SHADER_TEX,
BIT(FD6_GROUP_GS_TEX));
fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_TEX,
BIT(FD6_GROUP_FS_TEX));
/* NOTE: scissor enabled bit is part of rasterizer state, but
* fd_rasterizer_state_bind() will mark scissor dirty if needed:
*/
fd_context_add_map(ctx, FD_DIRTY_SCISSOR, BIT(FD6_GROUP_SCISSOR));
/* Stuff still emit in IB2
*
* NOTE: viewport state doesn't seem to change frequently, so possibly
* move it into FD6_GROUP_RASTERIZER?
*/
fd_context_add_map(ctx, FD_DIRTY_STENCIL_REF | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER,
BIT(FD6_GROUP_NON_GROUP));
}
struct pipe_context *
fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
disable_thread_safety_analysis
@ -158,6 +207,8 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
fd6_emit_init(pctx);
fd6_query_context_init(pctx);
setup_state_map(&fd6_ctx->base);
pctx = fd_context_init(&fd6_ctx->base, pscreen, primtypes, priv, flags);
if (!pctx)
return NULL;

View File

@ -185,10 +185,13 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds);
emit.key.key.tessellation = ir3_tess_mode(ds_info->tess.primitive_mode);
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
}
if (emit.key.gs)
if (emit.key.gs) {
emit.key.key.has_gs = true;
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
}
if (!(emit.key.hs || emit.key.ds || emit.key.gs || indirect))
fd6_vsc_update_sizes(ctx->batch, info, draw);
@ -207,7 +210,10 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
fixup_draw_state(ctx, &emit);
emit.dirty = ctx->dirty; /* *after* fixup_shader_state() */
/* *after* fixup_shader_state(): */
emit.dirty = ctx->dirty;
emit.dirty_groups = ctx->gen_dirty;
emit.bs = fd6_emit_get_prog(&emit)->bs;
emit.vs = fd6_emit_get_prog(&emit)->vs;
emit.hs = fd6_emit_get_prog(&emit)->hs;
@ -215,6 +221,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
emit.gs = fd6_emit_get_prog(&emit)->gs;
emit.fs = fd6_emit_get_prog(&emit)->fs;
if (emit.vs->need_driver_params || fd6_ctx->has_dp_state)
emit.dirty_groups |= BIT(FD6_GROUP_VS_DRIVER_PARAMS);
ctx->stats.vs_regs += ir3_shader_halfregs(emit.vs);
ctx->stats.hs_regs += COND(emit.hs, ir3_shader_halfregs(emit.hs));
ctx->stats.ds_regs += COND(emit.ds, ir3_shader_halfregs(emit.ds));
@ -302,7 +311,8 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
ctx->last.restart_index = restart_index;
}
if (emit.dirty)
// TODO move fd6_emit_streamout.. I think..
if (emit.dirty_groups)
fd6_emit_state(ring, &emit);
/* for debug after a lock up, write a unique counter value

View File

@ -738,17 +738,166 @@ build_lrz(struct fd6_emit *emit, bool binning_pass)
return ring;
}
static struct fd_ringbuffer *
build_scissor(struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, 3*4,
FD_RINGBUFFER_STREAMING);
OUT_REG(ring,
A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0,
.x = scissor->minx,
.y = scissor->miny
),
A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0,
.x = MAX2(scissor->maxx, 1) - 1,
.y = MAX2(scissor->maxy, 1) - 1
)
);
ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
return ring;
}
/* Combination of FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
* FD_DIRTY_PROG | FD_DIRTY_DUAL_BLEND
*/
static struct fd_ringbuffer *
build_prog_fb_rast(struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
const struct ir3_shader_variant *fs = emit->fs;
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
9 * 4, FD_RINGBUFFER_STREAMING);
unsigned nr = pfb->nr_cbufs;
if (ctx->rasterizer->rasterizer_discard)
nr = 0;
struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
if (blend->use_dual_src_blend)
nr++;
OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
COND(fs->writes_smask && pfb->samples > 1,
A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
COND(fs->writes_stencilref,
A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
COND(blend->use_dual_src_blend,
A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));
OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);
OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr));
unsigned mrt_components = 0;
for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
if (!pfb->cbufs[i])
continue;
mrt_components |= 0xf << (i * 4);
}
/* dual source blending has an extra fs output in the 2nd slot */
if (blend->use_dual_src_blend)
mrt_components |= 0xf << 4;
mrt_components &= prog->mrt_components;
OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(.dword = mrt_components));
OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(.dword = mrt_components));
return ring;
}
static struct fd_ringbuffer *
build_blend_color(struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
struct pipe_blend_color *bcolor = &ctx->blend_color;
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
5*4, FD_RINGBUFFER_STREAMING);
OUT_REG(ring,
A6XX_RB_BLEND_RED_F32(bcolor->color[0]),
A6XX_RB_BLEND_GREEN_F32(bcolor->color[1]),
A6XX_RB_BLEND_BLUE_F32(bcolor->color[2]),
A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])
);
return ring;
}
static struct fd_ringbuffer *
build_ibo(struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
if (emit->hs) {
debug_assert(ir3_shader_nibo(emit->hs) == 0);
debug_assert(ir3_shader_nibo(emit->ds) == 0);
}
if (emit->gs) {
debug_assert(ir3_shader_nibo(emit->gs) == 0);
}
struct fd_ringbuffer *ibo_state =
fd6_build_ibo_state(ctx, emit->fs, PIPE_SHADER_FRAGMENT);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);
OUT_PKT7(ring, CP_LOAD_STATE6, 3);
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(emit->fs)));
OUT_RB(ring, ibo_state);
OUT_PKT4(ring, REG_A6XX_SP_IBO, 2);
OUT_RB(ring, ibo_state);
/* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could
* de-duplicate this from program->config_stateobj
*/
OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
OUT_RING(ring, ir3_shader_nibo(emit->fs));
fd_ringbuffer_del(ibo_state);
return ring;
}
static void
fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit,
struct ir3_stream_output_info *info)
fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
struct ir3_stream_output_info *info = &fd6_last_shader(prog)->shader->stream_output;
struct fd_streamout_stateobj *so = &ctx->streamout;
emit->streamout_mask = 0;
if (!info->num_outputs)
return;
for (unsigned i = 0; i < so->num_targets; i++) {
struct fd_stream_output_target *target = fd_stream_output_target(so->targets[i]);
@ -813,65 +962,15 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit,
ctx->last.streamout_mask = emit->streamout_mask;
}
void
fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
/**
* Stuff that less frequently changes and isn't (yet) moved into stategroups
*/
static void
fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit)
assert_dt
{
struct fd_context *ctx = emit->ctx;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
const struct ir3_shader_variant *vs = emit->vs;
const struct ir3_shader_variant *hs = emit->hs;
const struct ir3_shader_variant *ds = emit->ds;
const struct ir3_shader_variant *gs = emit->gs;
const struct ir3_shader_variant *fs = emit->fs;
const enum fd_dirty_3d_state dirty = emit->dirty;
bool needs_border = false;
emit_marker6(ring, 5);
/* NOTE: we track fb_read differently than _BLEND_ENABLED since
* we might at some point decide to do sysmem in some cases when
* blend is enabled:
*/
if (fs->fb_read)
ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
if (emit->dirty & FD_DIRTY_VTXSTATE) {
struct fd6_vertex_stateobj *vtx = fd6_vertex_stateobj(ctx->vtx.vtx);
fd6_emit_add_group(emit, vtx->stateobj, FD6_GROUP_VTXSTATE, ENABLE_ALL);
}
if (emit->dirty & FD_DIRTY_VTXBUF) {
struct fd_ringbuffer *state;
state = build_vbo_state(emit);
fd6_emit_take_group(emit, state, FD6_GROUP_VBO, ENABLE_ALL);
}
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER)) {
struct fd_ringbuffer *state =
fd6_zsa_state(ctx,
util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])),
fd_depth_clamp_enabled(ctx));
fd6_emit_add_group(emit, state, FD6_GROUP_ZSA, ENABLE_ALL);
}
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) {
struct fd_ringbuffer *state;
state = build_lrz(emit, false);
if (state) {
fd6_emit_take_group(emit, state, FD6_GROUP_LRZ, ENABLE_DRAW);
}
state = build_lrz(emit, true);
if (state) {
fd6_emit_take_group(emit, state,
FD6_GROUP_LRZ_BINNING, CP_SET_DRAW_STATE__0_BINNING);
}
}
if (dirty & FD_DIRTY_STENCIL_REF) {
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
@ -881,33 +980,6 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
A6XX_RB_STENCILREF_BFREF(sr->ref_value[1]));
}
/* NOTE: scissor enabled bit is part of rasterizer state, but
* fd_rasterizer_state_bind() will mark scissor dirty if needed:
*/
if (dirty & FD_DIRTY_SCISSOR) {
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
emit->ctx->batch->submit, 3*4, FD_RINGBUFFER_STREAMING);
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
OUT_REG(ring,
A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0,
.x = scissor->minx,
.y = scissor->miny
),
A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0,
.x = MAX2(scissor->maxx, 1) - 1,
.y = MAX2(scissor->maxy, 1) - 1
)
);
fd6_emit_take_group(emit, ring, FD6_GROUP_SCISSOR, ENABLE_ALL);
ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
}
if (dirty & FD_DIRTY_VIEWPORT) {
struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
@ -962,154 +1034,139 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
A6XX_RB_Z_CLAMP_MIN(zmin),
A6XX_RB_Z_CLAMP_MAX(zmax));
}
}
if (dirty & FD_DIRTY_PROG) {
fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG, ENABLE_ALL);
fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW);
fd6_emit_add_group(emit, prog->binning_stateobj,
FD6_GROUP_PROG_BINNING, CP_SET_DRAW_STATE__0_BINNING);
void
fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
{
struct fd_context *ctx = emit->ctx;
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
const struct ir3_shader_variant *vs = emit->vs;
const struct ir3_shader_variant *hs = emit->hs;
const struct ir3_shader_variant *ds = emit->ds;
const struct ir3_shader_variant *gs = emit->gs;
const struct ir3_shader_variant *fs = emit->fs;
bool needs_border = false;
/* emit remaining streaming program state, ie. what depends on
* other emit state, so cannot be pre-baked.
*/
struct fd_ringbuffer *streaming = fd6_program_interp_state(emit);
emit_marker6(ring, 5);
fd6_emit_take_group(emit, streaming, FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
}
/* NOTE: we track fb_read differently than _BLEND_ENABLED since we
* might decide to do sysmem in some cases when blend is enabled:
*/
if (fs->fb_read)
ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
if (dirty & FD_DIRTY_RASTERIZER) {
struct fd_ringbuffer *stateobj =
fd6_rasterizer_state(ctx, emit->primitive_restart);
fd6_emit_add_group(emit, stateobj,
FD6_GROUP_RASTERIZER, ENABLE_ALL);
}
u_foreach_bit (b, emit->dirty_groups) {
enum fd6_state_id group = b;
struct fd_ringbuffer *state = NULL;
uint32_t enable_mask = ENABLE_ALL;
if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) {
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
emit->ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
unsigned nr = pfb->nr_cbufs;
if (ctx->rasterizer->rasterizer_discard)
nr = 0;
struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
if (blend->use_dual_src_blend)
nr++;
OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
COND(fs->writes_smask && pfb->samples > 1,
A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
COND(fs->writes_stencilref,
A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
COND(blend->use_dual_src_blend,
A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));
OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);
OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr));
unsigned mrt_components = 0;
for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
if (!pfb->cbufs[i])
switch (group) {
case FD6_GROUP_VTXSTATE:
state = fd6_vertex_stateobj(ctx->vtx.vtx)->stateobj;
fd_ringbuffer_ref(state);
break;
case FD6_GROUP_VBO:
state = build_vbo_state(emit);
break;
case FD6_GROUP_ZSA:
state = fd6_zsa_state(ctx,
util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])),
fd_depth_clamp_enabled(ctx));
fd_ringbuffer_ref(state);
break;
case FD6_GROUP_LRZ:
state = build_lrz(emit, false);
if (!state)
continue;
mrt_components |= 0xf << (i * 4);
enable_mask = ENABLE_DRAW;
break;
case FD6_GROUP_LRZ_BINNING:
state = build_lrz(emit, true);
if (!state)
continue;
enable_mask = CP_SET_DRAW_STATE__0_BINNING;
break;
case FD6_GROUP_SCISSOR:
state = build_scissor(emit);
break;
case FD6_GROUP_PROG:
fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG, ENABLE_ALL);
fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW);
fd6_emit_add_group(emit, prog->binning_stateobj,
FD6_GROUP_PROG_BINNING, CP_SET_DRAW_STATE__0_BINNING);
/* emit remaining streaming program state, ie. what depends on
* other emit state, so cannot be pre-baked.
*/
fd6_emit_take_group(emit, fd6_program_interp_state(emit),
FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
continue;
case FD6_GROUP_RASTERIZER:
state = fd6_rasterizer_state(ctx, emit->primitive_restart);
fd_ringbuffer_ref(state);
break;
case FD6_GROUP_PROG_FB_RAST:
state = build_prog_fb_rast(emit);
break;
case FD6_GROUP_BLEND:
state = fd6_blend_variant(ctx->blend, pfb->samples, ctx->sample_mask)->stateobj;
fd_ringbuffer_ref(state);
break;
case FD6_GROUP_BLEND_COLOR:
state = build_blend_color(emit);
break;
case FD6_GROUP_IBO:
state = build_ibo(emit);
fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring);
break;
case FD6_GROUP_CONST:
state = fd6_build_user_consts(emit);
break;
case FD6_GROUP_VS_DRIVER_PARAMS:
state = fd6_build_vs_driver_params(emit);
break;
case FD6_GROUP_PRIMITIVE_PARAMS:
state = fd6_build_tess_consts(emit);
break;
case FD6_GROUP_VS_TEX:
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs);
continue;
case FD6_GROUP_HS_TEX:
if (hs) {
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_CTRL, hs);
}
continue;
case FD6_GROUP_DS_TEX:
if (ds) {
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_EVAL, ds);
}
continue;
case FD6_GROUP_GS_TEX:
if (gs) {
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs);
}
continue;
case FD6_GROUP_FS_TEX:
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs);
continue;
case FD6_GROUP_SO:
fd6_emit_streamout(ring, emit);
continue;
case FD6_GROUP_NON_GROUP:
fd6_emit_non_ring(ring, emit);
continue;
default:
unreachable("bad state group");
}
/* dual source blending has an extra fs output in the 2nd slot */
if (blend->use_dual_src_blend)
mrt_components |= 0xf << 4;
mrt_components &= prog->mrt_components;
OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(.dword = mrt_components));
OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(.dword = mrt_components));
fd6_emit_take_group(emit, ring, FD6_GROUP_PROG_FB_RAST, ENABLE_DRAW);
fd6_emit_take_group(emit, state, group, enable_mask);
}
fd6_emit_consts(emit);
struct ir3_stream_output_info *info = &fd6_last_shader(prog)->shader->stream_output;
if (info->num_outputs)
fd6_emit_streamout(ring, emit, info);
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) {
struct fd6_blend_variant *blend = fd6_blend_variant(ctx->blend,
pfb->samples, ctx->sample_mask);
fd6_emit_add_group(emit, blend->stateobj, FD6_GROUP_BLEND, ENABLE_DRAW);
}
if (dirty & FD_DIRTY_BLEND_COLOR) {
struct pipe_blend_color *bcolor = &ctx->blend_color;
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
emit->ctx->batch->submit, 5*4, FD_RINGBUFFER_STREAMING);
OUT_REG(ring,
A6XX_RB_BLEND_RED_F32(bcolor->color[0]),
A6XX_RB_BLEND_GREEN_F32(bcolor->color[1]),
A6XX_RB_BLEND_BLUE_F32(bcolor->color[2]),
A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])
);
fd6_emit_take_group(emit, ring, FD6_GROUP_BLEND_COLOR, ENABLE_DRAW);
}
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs);
if (hs) {
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_CTRL, hs);
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_EVAL, ds);
}
if (gs) {
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs);
}
needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs);
if (needs_border)
emit_border_color(ctx, ring);
if (hs) {
debug_assert(ir3_shader_nibo(hs) == 0);
debug_assert(ir3_shader_nibo(ds) == 0);
}
if (gs) {
debug_assert(ir3_shader_nibo(gs) == 0);
}
#define DIRTY_IBO (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE | \
FD_DIRTY_SHADER_PROG)
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_IBO) {
struct fd_ringbuffer *state =
fd6_build_ibo_state(ctx, fs, PIPE_SHADER_FRAGMENT);
struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(
ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);
OUT_PKT7(obj, CP_LOAD_STATE6, 3);
OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(fs)));
OUT_RB(obj, state);
OUT_PKT4(obj, REG_A6XX_SP_IBO, 2);
OUT_RB(obj, state);
/* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could
* de-duplicate this from program->config_stateobj
*/
OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1);
OUT_RING(obj, ir3_shader_nibo(fs));
fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring);
fd6_emit_take_group(emit, obj, FD6_GROUP_IBO, ENABLE_DRAW);
fd_ringbuffer_del(state);
}
if (emit->num_groups > 0) {
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
for (unsigned i = 0; i < emit->num_groups; i++) {

View File

@ -60,13 +60,14 @@ enum fd6_state_id {
FD6_GROUP_DS_TEX,
FD6_GROUP_GS_TEX,
FD6_GROUP_FS_TEX,
FD6_GROUP_IBO,
FD6_GROUP_RASTERIZER,
FD6_GROUP_ZSA,
FD6_GROUP_BLEND,
FD6_GROUP_SCISSOR,
FD6_GROUP_BLEND_COLOR,
FD6_GROUP_SO,
FD6_GROUP_IBO,
FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
};
#define ENABLE_ALL (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
@ -90,6 +91,7 @@ struct fd6_emit {
const struct pipe_draw_start_count *draw;
struct ir3_cache_key key;
enum fd_dirty_3d_state dirty;
uint32_t dirty_groups;
uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode;