diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index d315f40ed20..32aa61f70fc 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -121,13 +121,18 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_emit_state(pctx); /* the actual draw call. */ - uint32_t nr_attributes = 1; + struct vc4_vertex_stateobj *vtx = vc4->vtx; + struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE); + assert(vtx->num_elements <= 8); #ifndef USE_VC4_SIMULATOR - cl_u32(&vc4->bcl, nr_attributes & 0x7); /* offset into shader_rec */ + /* Note that number of attributes == 0 in the packet means 8 + * attributes. This field also contains the offset into shader_rec. + */ + cl_u32(&vc4->bcl, vtx->num_elements & 0x7); #else cl_u32(&vc4->bcl, simpenrose_hw_addr(vc4->shader_rec.next) | - (nr_attributes & 0x7)); + (vtx->num_elements & 0x7)); #endif /* Note that the primitive type fields match with OpenGL/gallium @@ -174,7 +179,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) &vc4->constbuf[PIPE_SHADER_VERTEX], 1, &cs_ubo, &cs_ubo_offset); - cl_start_shader_reloc(&vc4->shader_rec, 7); + cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements); cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING); cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */ cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs); @@ -182,35 +187,36 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_reloc(vc4, &vc4->shader_rec, fs_ubo, fs_ubo_offset); cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */ - cl_u8(&vc4->shader_rec, 1); /* vs attribute array bitfield */ - cl_u8(&vc4->shader_rec, 16); /* vs total attribute size */ + cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* vs attribute array bitfield */ + cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */ cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0); cl_reloc(vc4, &vc4->shader_rec, vs_ubo, vs_ubo_offset); cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */ - cl_u8(&vc4->shader_rec, 1); /* cs attribute array bitfield */ - cl_u8(&vc4->shader_rec, 16); /* vs total attribute size */ + cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* cs attribute array bitfield */ + cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */ cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, vc4->prog.vs->coord_shader_offset); cl_reloc(vc4, &vc4->shader_rec, cs_ubo, cs_ubo_offset); - struct vc4_vertex_stateobj *vtx = vc4->vtx; - struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; for (int i = 0; i < vtx->num_elements; i++) { struct pipe_vertex_element *elem = &vtx->pipe[i]; struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index]; struct vc4_resource *rsc = vc4_resource(vb->buffer); + if (elem->src_format != PIPE_FORMAT_R32G32B32A32_FLOAT) { + fprintf(stderr, "Unsupported attribute format %s\n", + util_format_name(elem->src_format)); + } + cl_reloc(vc4, &vc4->shader_rec, rsc->bo, vb->buffer_offset + elem->src_offset); cl_u8(&vc4->shader_rec, util_format_get_blocksize(elem->src_format) - 1); cl_u8(&vc4->shader_rec, vb->stride); - cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */ - cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */ - - break; /* XXX: just the 1 for now. */ + cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */ + cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */ } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 1cf518dceb2..3f52588ada8 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -53,7 +53,6 @@ struct tgsi_to_qir { uint32_t *uniform_data; enum quniform_contents *uniform_contents; uint32_t num_uniforms; - uint32_t num_inputs; uint32_t num_outputs; }; @@ -351,25 +350,28 @@ emit_tgsi_declaration(struct tgsi_to_qir *trans, switch (decl->Declaration.File) { case TGSI_FILE_INPUT: - if (c->stage == QSTAGE_FRAG) { - for (int index = decl->Range.First; - index <= decl->Range.Last; - index++) { - for (int i = 0; i < 4; i++) { - struct qreg vary = { - QFILE_VARY, - index * 4 + i - }; + for (int i = decl->Range.First * 4; + i < (decl->Range.Last + 1) * 4; + i++) { + if (c->stage == QSTAGE_FRAG) { + struct qreg vary = { + QFILE_VARY, + i + }; - /* XXX: multiply by W */ - trans->inputs[index * 4 + i] = - qir_VARY_ADD_C(c, - qir_MOV(c, - vary)); - - trans->num_inputs++; - } + trans->inputs[i] = + qir_VARY_ADD_C(c, qir_MOV(c, vary)); + } else { + struct qreg dst = qir_get_temp(c); + /* XXX: attribute type/size/count */ + qir_emit(c, qir_inst(QOP_VPM_READ, + dst, + c->undef, + c->undef)); + trans->inputs[i] = dst; } + + c->num_inputs++; } break; } @@ -466,37 +468,6 @@ parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm) } } -static void -emit_frag_init(struct tgsi_to_qir *trans) -{ -} - -static void -emit_vert_init(struct tgsi_to_qir *trans) -{ - struct qcompile *c = trans->c; - - /* XXX: attribute type/size/count */ - for (int i = 0; i < 4; i++) { - trans->inputs[i] = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i], - c->undef, c->undef)); - } -} - -static void -emit_coord_init(struct tgsi_to_qir *trans) -{ - struct qcompile *c = trans->c; - - /* XXX: attribute type/size/count */ - for (int i = 0; i < 4; i++) { - trans->inputs[i] = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i], - c->undef, c->undef)); - } -} - static void emit_frag_end(struct tgsi_to_qir *trans) { @@ -619,15 +590,12 @@ vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage, switch (stage) { case QSTAGE_FRAG: trans->fs_key = (struct vc4_fs_key *)key; - emit_frag_init(trans); break; case QSTAGE_VERT: trans->vs_key = (struct vc4_vs_key *)key; - emit_vert_init(trans); break; case QSTAGE_COORD: trans->vs_key = (struct vc4_vs_key *)key; - emit_coord_init(trans); break; } @@ -721,7 +689,7 @@ vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader, { struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(shader, QSTAGE_FRAG, &key->base); - shader->num_inputs = trans->num_inputs; + shader->num_inputs = trans->c->num_inputs; copy_uniform_state_to_shader(shader, 0, trans); shader->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts, trans->c->qpu_inst_count * sizeof(uint64_t), diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index ff222e4aeff..491748ff05d 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -132,6 +132,7 @@ struct qcompile { uint64_t *qpu_insts; uint32_t qpu_inst_count; uint32_t qpu_inst_size; + uint32_t num_inputs; }; struct qcompile *qir_compile_init(void); diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 6b047023e56..fff086e26e9 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -218,7 +218,9 @@ vc4_generate_code(struct qcompile *c) switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: - queue(c, qpu_load_imm_ui(qpu_vrsetup(), 0x00401a00)); + queue(c, qpu_load_imm_ui(qpu_vrsetup(), + (0x00001a00 + + 0x00100000 * c->num_inputs))); queue(c, qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00)); break; case QSTAGE_FRAG: