freedreno/a3xx+a4xx: add support for vtxcnt semantic

This will be used for stream-out (transform-feedback)

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2015-07-24 17:07:23 -04:00
parent 1b1ef6b457
commit 65d36a109a
4 changed files with 31 additions and 14 deletions

View File

@ -393,7 +393,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
unsigned vertex_regid = regid(63, 0);
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
uint8_t semantic = sem2name(vp->inputs[i].semantic);
@ -401,14 +403,17 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
vertex_regid = vp->inputs[i].regid;
else if (semantic == TGSI_SEMANTIC_INSTANCEID)
instance_regid = vp->inputs[i].regid;
else if (semantic == IR3_SEMANTIC_VTXCNT)
vtxcnt_regid = vp->inputs[i].regid;
else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
last = i;
}
/* hw doesn't like to be configured for zero vbo's, it seems: */
if (vtx->vtx->num_elements == 0 &&
vertex_regid == regid(63, 0) &&
instance_regid == regid(63, 0))
if ((vtx->vtx->num_elements == 0) &&
(vertex_regid == regid(63, 0)) &&
(instance_regid == regid(63, 0)) &&
(vtxcnt_regid == regid(63, 0)))
return;
for (i = 0, j = 0; i <= last; i++) {
@ -421,8 +426,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
enum pipe_format pfmt = elem->src_format;
enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
bool switchnext = (i != last) ||
vertex_regid != regid(63, 0) ||
instance_regid != regid(63, 0);
(vertex_regid != regid(63, 0)) ||
(instance_regid != regid(63, 0)) ||
(vtxcnt_regid != regid(63, 0));
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
@ -461,6 +467,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid));
}
void

View File

@ -449,10 +449,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
}
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
if (vpbuffer == BUFFER)
emit_shader(ring, vp);

View File

@ -251,7 +251,9 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
unsigned vertex_regid = regid(63, 0);
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
uint8_t semantic = sem2name(vp->inputs[i].semantic);
@ -259,6 +261,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
vertex_regid = vp->inputs[i].regid;
else if (semantic == TGSI_SEMANTIC_INSTANCEID)
instance_regid = vp->inputs[i].regid;
else if (semantic == IR3_SEMANTIC_VTXCNT)
vtxcnt_regid = vp->inputs[i].regid;
else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
last = i;
}
@ -266,7 +270,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
/* hw doesn't like to be configured for zero vbo's, it seems: */
if ((vtx->vtx->num_elements == 0) &&
(vertex_regid == regid(63, 0)) &&
(instance_regid == regid(63, 0)))
(instance_regid == regid(63, 0)) &&
(vtxcnt_regid == regid(63, 0)))
return;
for (i = 0, j = 0; i <= last; i++) {
@ -280,7 +285,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
bool switchnext = (i != last) ||
(vertex_regid != regid(63, 0)) ||
(instance_regid != regid(63, 0));
(instance_regid != regid(63, 0)) ||
(vtxcnt_regid != regid(63, 0));
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
uint32_t off = vb->buffer_offset + elem->src_offset;
@ -321,7 +327,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0)));
OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
/* cache invalidate, otherwise vertex fetch could see

View File

@ -34,6 +34,11 @@
#include "ir3.h"
#include "disasm.h"
/* internal semantic used for passing vtxcnt to vertex shader to
* implement transform feedback:
*/
#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
typedef uint16_t ir3_semantic; /* semantic name + index */
static inline ir3_semantic
ir3_semantic_name(uint8_t name, uint16_t index)