diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 9032366b748..49b3d97cce8 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -393,7 +393,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) uint32_t total_in = 0; const struct fd_vertex_state *vtx = emit->vtx; struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); - unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0); + unsigned vertex_regid = regid(63, 0); + unsigned instance_regid = regid(63, 0); + unsigned vtxcnt_regid = regid(63, 0); for (i = 0; i < vp->inputs_count; i++) { uint8_t semantic = sem2name(vp->inputs[i].semantic); @@ -401,14 +403,17 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) vertex_regid = vp->inputs[i].regid; else if (semantic == TGSI_SEMANTIC_INSTANCEID) instance_regid = vp->inputs[i].regid; + else if (semantic == IR3_SEMANTIC_VTXCNT) + vtxcnt_regid = vp->inputs[i].regid; else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) last = i; } /* hw doesn't like to be configured for zero vbo's, it seems: */ - if (vtx->vtx->num_elements == 0 && - vertex_regid == regid(63, 0) && - instance_regid == regid(63, 0)) + if ((vtx->vtx->num_elements == 0) && + (vertex_regid == regid(63, 0)) && + (instance_regid == regid(63, 0)) && + (vtxcnt_regid == regid(63, 0))) return; for (i = 0, j = 0; i <= last; i++) { @@ -421,8 +426,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) enum pipe_format pfmt = elem->src_format; enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt); bool switchnext = (i != last) || - vertex_regid != regid(63, 0) || - instance_regid != regid(63, 0); + (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); bool isint = util_format_is_pure_integer(pfmt); uint32_t fs = util_format_get_blocksize(pfmt); @@ -461,6 +467,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | A3XX_VFD_CONTROL_1_REGID4INST(instance_regid)); + + OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); + OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | + A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid)); } void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 9bf42f5b931..7cd4885ab4f 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -449,10 +449,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ } - OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); - OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | - A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252)); - if (vpbuffer == BUFFER) emit_shader(ring, vp); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 4462a82777f..62a1e9ee955 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -251,7 +251,9 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) uint32_t total_in = 0; const struct fd_vertex_state *vtx = emit->vtx; struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); - unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0); + unsigned vertex_regid = regid(63, 0); + unsigned instance_regid = regid(63, 0); + unsigned vtxcnt_regid = regid(63, 0); for (i = 0; i < vp->inputs_count; i++) { uint8_t semantic = sem2name(vp->inputs[i].semantic); @@ -259,6 +261,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) vertex_regid = vp->inputs[i].regid; else if (semantic == TGSI_SEMANTIC_INSTANCEID) instance_regid = vp->inputs[i].regid; + else if (semantic == IR3_SEMANTIC_VTXCNT) + vtxcnt_regid = vp->inputs[i].regid; else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask) last = i; } @@ -266,7 +270,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) /* hw doesn't like to be configured for zero vbo's, it seems: */ if ((vtx->vtx->num_elements == 0) && (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0))) + (instance_regid == regid(63, 0)) && + (vtxcnt_regid == regid(63, 0))) return; for (i = 0, j = 0; i <= last; i++) { @@ -280,7 +285,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt); bool switchnext = (i != last) || (vertex_regid != regid(63, 0)) || - (instance_regid != regid(63, 0)); + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); bool isint = util_format_is_pure_integer(pfmt); uint32_t fs = util_format_get_blocksize(pfmt); uint32_t off = vb->buffer_offset + elem->src_offset; @@ -321,7 +327,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | A4XX_VFD_CONTROL_1_REGID4INST(instance_regid)); OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */ - OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0))); + OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid)); OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */ /* cache invalidate, otherwise vertex fetch could see diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index ef16d7b2f6e..f0af4478109 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -34,6 +34,11 @@ #include "ir3.h" #include "disasm.h" +/* internal semantic used for passing vtxcnt to vertex shader to + * implement transform feedback: + */ +#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0) + typedef uint16_t ir3_semantic; /* semantic name + index */ static inline ir3_semantic ir3_semantic_name(uint8_t name, uint16_t index)